spec/groupie_spec.rb in groupie-0.2.2 vs spec/groupie_spec.rb in groupie-0.3.0

- old
+ new

@@ -71,13 +71,39 @@ g[:spam].add %w[word] * 1000 g[:ham].add %w[word] * 10 g.classify('word', :log).should == {:spam=>0.75, :ham=>0.25} end end + + describe "unique" do + it "should should behave as sqrt strategy" do + g = Groupie.new + g[:spam].add %w[buy viagra now] + g[:ham].add %w[buy flowers now] + g.classify('buy', :unique).should == g.classify('buy', :sqrt) + g.classify('flowers', :unique).should == g.classify('flowers', :sqrt) + end + end end end + describe "unique_words" do + it "should exclude all words in the 4th quintile of all groups" do + g = Groupie.new + g[:spam].add %w[one two two three three three four four four four] + g[:ham].add %w[apple banana pear orange three] + g.unique_words.sort.should == %w[one two apple banana pear orange].sort + end + + it "should work on an empty word set" do + g = Groupie.new + g[:spam].add [] + g[:ham].add [] + g.unique_words.should == [] + end + end + context "classify_text" do it 'should tokenized html emails' do g = Groupie.new spam_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam spam.la-44118014.txt])).tokenize ham_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures ham spam.la-44116217.txt])).tokenize @@ -106,25 +132,32 @@ g = Groupie.new g[:spam].add %w[buy viagra now] g[:ham].add %w[buy flowers now] g.classify_text(%w[buy buckets now]).should == {:spam=>0.5, :ham=>0.5} end - + it "should support the sqrt strategy" do g = Groupie.new g[:spam].add %w[one] * 9 g[:ham].add %w[one] g[:spam].add %w[two] * 9 g[:ham].add %w[two] g.classify_text(%w[one two three], :sqrt).should == {:spam=>0.75, :ham=>0.25} end - + it "should support the log strategy" do g = Groupie.new g[:spam].add %w[one] * 100 g[:ham].add %w[one] g[:spam].add %w[two] g[:ham].add %w[two] * 100 g.classify_text(%w[one two three], :log).should == {:spam=>0.5, :ham=>0.5} + end + + it "should only rate unique words for the unique strategy" do + g = Groupie.new + g[:spam].add %w[one two two three three three four four four four] + g[:ham].add %w[apple banana pear] + g.classify_text(%w[one two three apple banana], :unique).should == {:spam=>0.5, :ham=>0.5} end end end