Sha256: a44f4de8b49fb49e9d9a2155e33f3010e9f8918e2a1b1a61e3a7d9f5088696b7
Contents?: true
Size: 1.51 KB
Versions: 66
Compression:
Stored size: 1.51 KB
Contents
# frozen_string_literal: true require 'bitarray' module SplitIoClient module Cache module Filter class BloomFilter def initialize(capacity, false_positive_probability = 0.001) @capacity = capacity.round m = best_m(capacity, false_positive_probability) @ba = BitArray.new(m.round) @k = best_k(capacity) end def add(string) return false if contains?(string) positions = hashes(string) positions.each { |position| @ba[position] = 1 } true end def contains?(string) !hashes(string).any? { |ea| @ba[ea] == 0 } end def clear @ba.size.times { |i| @ba[i] = 0 } end private # m is the required number of bits in the array def best_m(capacity, false_positive_probability) -(capacity * Math.log(false_positive_probability)) / (Math.log(2) ** 2) end # k is the number of hash functions that minimizes the probability of false positives def best_k(capacity) (Math.log(2) * (@ba.size / capacity)).round end def hashes(data) m = @ba.size h = Digest::MD5.hexdigest(data.to_s).to_i(16) x = h % m h /= m y = h % m h /= m z = h % m [x] + 1.upto(@k - 1).collect do |i| x = (x + y) % m y = (y + z) % m x end end end end end end
Version data entries
66 entries across 66 versions & 1 rubygems