Sha256: 667082872e4cfed4a8529d0b77194aef26a9bd1c22d1593c2d3927e618b1451e
Contents?: true
Size: 842 Bytes
Versions: 1
Compression:
Stored size: 842 Bytes
Contents
# frozen_string_literal: true require "murmurhash3" module Minhash # Class for generating Minhash signature class Minhash attr_reader :seed_root def initialize(n_hashes = 1, seed_root = rand(2**32)) @seed_root = seed_root @hashes = Array.new(n_hashes) do |seed| ->(x) { MurmurHash3::V32.str_hash(x, seed_root + seed) } end end # Produces the Minhash signature for a given Set # # @param set [Set] the set to produce the signature for # # @return [Array[Integer]] 32 bit integer array of length n_hashes def signature(set) counter = Array.new(@hashes.length, Float::INFINITY) set.each do |elem| @hashes.each_with_index do |hash_func, i| counter[i] = [counter[i], hash_func.call(elem)].min end end counter end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
doc_sim-0.1.0 | lib/doc_sim/minhash.rb |