Sha256: c0679ee985bb936c1cf8005826a191cee47f02a06a8864b2d976d57fa426e0c4

Contents?: true

Size: 1.72 KB

Versions: 1

Compression:

Stored size: 1.72 KB

Contents

require 'zlib'

class BloomFilter
  BITS_PER_FIXNUM = 31 # this can really be more on 64-bit systems
  DUMP_SEPARATOR = "\n"
  
  def self.optimal_values(estimated_elements, probability)
    m = -(estimated_elements * Math.log(probability)) / (Math.log(2) ** 2)
    k = 0.7 * (m / estimated_elements)
    [m.round, k.round]
  end
  
  def self.read(file)
    m = file.gets(DUMP_SEPARATOR).to_i
    k = file.gets(DUMP_SEPARATOR).to_i
    bits = Array.new((m.to_f / BITS_PER_FIXNUM).ceil, 0)
    index = 0
    while line = file.gets(DUMP_SEPARATOR)
      bits[index] = line.to_i
      index += 1
    end
    [m, k, bits]
  end
  
  def self.load(dumped)
    m, k, *bits = dumped.split(DUMP_SEPARATOR).collect { |v| v.to_i }
    new(m, k, bits)
  end
  
  def initialize(m, k, bits = nil)
    @k = k
    @m = m
    @bits = bits || Array.new((m.to_f / BITS_PER_FIXNUM).ceil, 0)
  end
  
  def add(el)
    @k.times do |i|
      self.set_bit(Zlib.crc32("#{i}#{el}") % @m)
    end
    self
  end
  
  alias_method :<<, :add
  
  def include?(el)
    @k.times do |i|
      return false if !bit_set?(Zlib.crc32("#{i}#{el}") % @m)
    end
    true
  end
  
  def &(els)
    els.select { |el| self.include?(el) }
  end
  
  def dump
    [@m, @k, *@bits].join(DUMP_SEPARATOR)
  end
  
  def dup
    self.class.new(@m, @k, @bits.dup)
  end
  
  def replace(*args)
    m, k, bits = if args.size == 1
      self.class.read(args.first)
    else
      args
    end
    @m, @k = m, k
    @bits.replace(bits)
  end
  
  protected
  def set_bit(n)
    index, offset = n / BITS_PER_FIXNUM, n % BITS_PER_FIXNUM
    @bits[index] |= 1 << offset
  end
  
  def bit_set?(n)
    index, offset = n / BITS_PER_FIXNUM, n % BITS_PER_FIXNUM
    (@bits[index] & (1 << offset)) > 0
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
bloom_filter-0.6.6 lib/bloom_filter.rb