Sha256: b7569ce2fb9b30e3824c30a8e3af7d26ccdcbde9eb7ebb842771ea1b0a33c892
Contents?: true
Size: 943 Bytes
Versions: 2
Compression:
Stored size: 943 Bytes
Contents
# frozen_string_literal: true module LocalitySensitiveHashing # Hashing technique which groups similar objects together class LocalitySensitiveHashing def initialize(n_rows, n_bands) @buckets = Array.new(n_bands) { generate_band_bucket } @n_rows = n_rows end def insert(signature, doc_id) if signature.length != @n_rows * @buckets.length raise(ArgumentError, "signature length does not match n_rows and n_bands") end signature.each_slice(@n_rows).with_index do |band_signature, band_idx| @buckets[band_idx][band_signature] << doc_id end end def similar_pairs similar = Set.new @buckets.each do |band_bucket| band_bucket.each_value do |bucket| similar.merge(bucket.combination(2)) end end similar end private def generate_band_bucket Hash.new { |table, key| table[key] = [] } end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
doc_sim-0.1.1 | lib/doc_sim/locality_sensitive_hashing.rb |
doc_sim-0.1.0 | lib/doc_sim/locality_sensitive_hashing.rb |