#!/usr/bin/env ruby

require 'rubygems'
require 'wukong'

require 'bloomfilter-rb'

SIZE = 2**24

class BucketCounter
  def initialize(opts = {})
    @opts = {
      :size    => 100,
      :server => {}
    }.merge opts
    @db   = ::Redis.new(@opts[:server])
    @size = opts[:size]
  end

  def key_for val
    (val.hash % @size)
  end

  def insert(val)
    @db.incr(key_for(val))
  end
  alias :<< :insert

  def delete(val)
    if @db.decr(key_for(val)).to_i <= 0
      @db.del(key_for(val))
    end
  end

  def [](val)
    @db.get(key_for(val)).to_i
  end

  def clear
    @db.flushdb
  end
end

bf     = BucketCounter.new(:size => 1_000, :server => {:host => 'localhost'})
bf.clear
counts = Hash.new{|h,k| h[k] = 0 }

doc = File.read(__FILE__)
doc.split(/\W+/).each do |word|
  counts[word] += 1
  bf << word
end

counts.keys.sort.each do |word|
  puts [ bf[word] - counts[word], bf[word], counts[word], word.hash % SIZE, word ].join("\t")
end