Sha256: c2cb64b206aff56679756ff6838f2b790d9ccc3ad3a315c5a160fb51d190910d
Contents?: true
Size: 917 Bytes
Versions: 1
Compression:
Stored size: 917 Bytes
Contents
# frozen_string_literal: true require "murmurhash3" module Minhash # Class for generating Minhash signature class Minhash attr_reader :seed_root # Hashes will always be <= 2**32 HASH_MAX = (2**32) + 1 def initialize(n_hashes = 1, seed_root = rand(2**32)) @seed_root = seed_root @hashes = Array.new(n_hashes) do |seed| ->(x) { MurmurHash3::V32.str_hash(x, seed_root + seed) } end end # Produces the Minhash signature for a given Set # # @param set [Set[String]] the set to produce the signature for # # @return [Array[Integer]] 32 bit integer array of length n_hashes def signature(set) counter = Array.new(@hashes.length, Minhash::HASH_MAX) set.each do |elem| @hashes.each_with_index do |hash_func, i| counter[i] = [counter[i], hash_func.call(elem)].min end end counter end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
doc_sim-0.1.1 | lib/doc_sim/minhash.rb |