Sha256: c0679ee985bb936c1cf8005826a191cee47f02a06a8864b2d976d57fa426e0c4
Contents?: true
Size: 1.72 KB
Versions: 1
Compression:
Stored size: 1.72 KB
Contents
require 'zlib' class BloomFilter BITS_PER_FIXNUM = 31 # this can really be more on 64-bit systems DUMP_SEPARATOR = "\n" def self.optimal_values(estimated_elements, probability) m = -(estimated_elements * Math.log(probability)) / (Math.log(2) ** 2) k = 0.7 * (m / estimated_elements) [m.round, k.round] end def self.read(file) m = file.gets(DUMP_SEPARATOR).to_i k = file.gets(DUMP_SEPARATOR).to_i bits = Array.new((m.to_f / BITS_PER_FIXNUM).ceil, 0) index = 0 while line = file.gets(DUMP_SEPARATOR) bits[index] = line.to_i index += 1 end [m, k, bits] end def self.load(dumped) m, k, *bits = dumped.split(DUMP_SEPARATOR).collect { |v| v.to_i } new(m, k, bits) end def initialize(m, k, bits = nil) @k = k @m = m @bits = bits || Array.new((m.to_f / BITS_PER_FIXNUM).ceil, 0) end def add(el) @k.times do |i| self.set_bit(Zlib.crc32("#{i}#{el}") % @m) end self end alias_method :<<, :add def include?(el) @k.times do |i| return false if !bit_set?(Zlib.crc32("#{i}#{el}") % @m) end true end def &(els) els.select { |el| self.include?(el) } end def dump [@m, @k, *@bits].join(DUMP_SEPARATOR) end def dup self.class.new(@m, @k, @bits.dup) end def replace(*args) m, k, bits = if args.size == 1 self.class.read(args.first) else args end @m, @k = m, k @bits.replace(bits) end protected def set_bit(n) index, offset = n / BITS_PER_FIXNUM, n % BITS_PER_FIXNUM @bits[index] |= 1 << offset end def bit_set?(n) index, offset = n / BITS_PER_FIXNUM, n % BITS_PER_FIXNUM (@bits[index] & (1 << offset)) > 0 end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
bloom_filter-0.6.6 | lib/bloom_filter.rb |