Sha256: a44f4de8b49fb49e9d9a2155e33f3010e9f8918e2a1b1a61e3a7d9f5088696b7

Contents?: true

Size: 1.51 KB

Versions: 66

Compression:

Stored size: 1.51 KB

Contents

# frozen_string_literal: true

require 'bitarray'

module SplitIoClient
  module Cache
    module Filter
      class BloomFilter
        def initialize(capacity, false_positive_probability = 0.001)
          @capacity = capacity.round
          m = best_m(capacity, false_positive_probability)
          @ba = BitArray.new(m.round)
          @k = best_k(capacity)
        end

        def add(string)
          return false if contains?(string)

          positions = hashes(string)
          
          positions.each { |position| @ba[position] = 1 }

          true
        end
      
        def contains?(string)
          !hashes(string).any? { |ea| @ba[ea] == 0 }
        end

        def clear
          @ba.size.times { |i| @ba[i] = 0 }
        end
      
        private

        # m is the required number of bits in the array
        def best_m(capacity, false_positive_probability)
          -(capacity * Math.log(false_positive_probability)) / (Math.log(2) ** 2)
        end

        # k is the number of hash functions that minimizes the probability of false positives
        def best_k(capacity)
          (Math.log(2) * (@ba.size / capacity)).round
        end

        def hashes(data)
          m = @ba.size
          h = Digest::MD5.hexdigest(data.to_s).to_i(16)
          x = h % m
          h /= m
          y = h % m
          h /= m
          z = h % m
          [x] + 1.upto(@k - 1).collect do |i|
            x = (x + y) % m
            y = (y + z) % m
            x
          end
        end
      end
    end
  end
end

Version data entries

66 entries across 66 versions & 1 rubygems

Version Path
splitclient-rb-8.4.0-java lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.4.0 lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.4.0.pre.rc1-java lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.4.0.pre.rc1 lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.4.0.rc.1-java lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.4.0.rc.1 lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.2.pre.rc2-java lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.2.pre.rc2 lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.2.pre.rc1-java lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.2.pre.rc1 lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.1-java lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.1 lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.1.pre.rc1-java lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.1.pre.rc1 lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.0-java lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.0 lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.0.pre.rc3-java lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.0.pre.rc3 lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.0.pre.rc2-java lib/splitclient-rb/cache/filter/bloom_filter.rb
splitclient-rb-8.3.0.pre.rc2 lib/splitclient-rb/cache/filter/bloom_filter.rb