spec/groupie_spec.rb in groupie-0.1.1 vs spec/groupie_spec.rb in groupie-0.2.2

- old
+ new

@@ -43,11 +43,41 @@ c = g.classify('discreetly') c[:spam].should > c[:ham] c2 = g.classify('user') c2[:ham].should > c2[:spam] end + + describe "strategies" do + describe "sum" do + it "should weigh words for the sum of their occurances" do + g = Groupie.new + g[:spam].add %w[word] * 9 + g[:ham].add %w[word] + g.classify('word', :sum).should == {:spam=>0.9, :ham=>0.1} + end + end + + describe "sqrt" do + it "should weigh words for the square root of the sum of ocurances" do + g = Groupie.new + g[:spam].add %w[word] * 9 + g[:ham].add %w[word] + g.classify('word', :sqrt).should == {:spam=>0.75, :ham=>0.25} + end + end + + describe "log" do + it "should weigh words for log10 of their sum of occurances" do + g = Groupie.new + g[:spam].add %w[word] * 1000 + g[:ham].add %w[word] * 10 + g.classify('word', :log).should == {:spam=>0.75, :ham=>0.25} + end + end + end end + context "classify_text" do it 'should tokenized html emails' do g = Groupie.new spam_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam spam.la-44118014.txt])).tokenize ham_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures ham spam.la-44116217.txt])).tokenize @@ -68,8 +98,33 @@ g[:ham].add %w[buy flowers for your mom] result = g.classify_text "Grow flowers to sell on our website".tokenize result[:spam].should > result[:ham] result2 = g.classify_text "Grow flowers to give to your mom".tokenize result2[:ham].should == result2[:spam] + end + + it "should skip unknown tokens" do + g = Groupie.new + g[:spam].add %w[buy viagra now] + g[:ham].add %w[buy flowers now] + g.classify_text(%w[buy buckets now]).should == {:spam=>0.5, :ham=>0.5} + end + + it "should support the sqrt strategy" do + g = Groupie.new + g[:spam].add %w[one] * 9 + g[:ham].add %w[one] + g[:spam].add %w[two] * 9 + g[:ham].add %w[two] + g.classify_text(%w[one two three], :sqrt).should == {:spam=>0.75, :ham=>0.25} + end + + it "should support the log strategy" do + g = Groupie.new + g[:spam].add %w[one] * 100 + g[:ham].add %w[one] + g[:spam].add %w[two] + g[:ham].add %w[two] * 100 + g.classify_text(%w[one two three], :log).should == {:spam=>0.5, :ham=>0.5} end end end