spec/groupie_spec.rb in groupie-0.2.2 vs spec/groupie_spec.rb in groupie-0.3.0
- old
+ new
@@ -71,13 +71,39 @@
g[:spam].add %w[word] * 1000
g[:ham].add %w[word] * 10
g.classify('word', :log).should == {:spam=>0.75, :ham=>0.25}
end
end
+
+ describe "unique" do
+ it "should should behave as sqrt strategy" do
+ g = Groupie.new
+ g[:spam].add %w[buy viagra now]
+ g[:ham].add %w[buy flowers now]
+ g.classify('buy', :unique).should == g.classify('buy', :sqrt)
+ g.classify('flowers', :unique).should == g.classify('flowers', :sqrt)
+ end
+ end
end
end
+ describe "unique_words" do
+ it "should exclude all words in the 4th quintile of all groups" do
+ g = Groupie.new
+ g[:spam].add %w[one two two three three three four four four four]
+ g[:ham].add %w[apple banana pear orange three]
+ g.unique_words.sort.should == %w[one two apple banana pear orange].sort
+ end
+
+ it "should work on an empty word set" do
+ g = Groupie.new
+ g[:spam].add []
+ g[:ham].add []
+ g.unique_words.should == []
+ end
+ end
+
context "classify_text" do
it 'should tokenized html emails' do
g = Groupie.new
spam_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures spam spam.la-44118014.txt])).tokenize
ham_tokens = File.read(File.join(File.dirname(__FILE__), %w[fixtures ham spam.la-44116217.txt])).tokenize
@@ -106,25 +132,32 @@
g = Groupie.new
g[:spam].add %w[buy viagra now]
g[:ham].add %w[buy flowers now]
g.classify_text(%w[buy buckets now]).should == {:spam=>0.5, :ham=>0.5}
end
-
+
it "should support the sqrt strategy" do
g = Groupie.new
g[:spam].add %w[one] * 9
g[:ham].add %w[one]
g[:spam].add %w[two] * 9
g[:ham].add %w[two]
g.classify_text(%w[one two three], :sqrt).should == {:spam=>0.75, :ham=>0.25}
end
-
+
it "should support the log strategy" do
g = Groupie.new
g[:spam].add %w[one] * 100
g[:ham].add %w[one]
g[:spam].add %w[two]
g[:ham].add %w[two] * 100
g.classify_text(%w[one two three], :log).should == {:spam=>0.5, :ham=>0.5}
+ end
+
+ it "should only rate unique words for the unique strategy" do
+ g = Groupie.new
+ g[:spam].add %w[one two two three three three four four four four]
+ g[:ham].add %w[apple banana pear]
+ g.classify_text(%w[one two three apple banana], :unique).should == {:spam=>0.5, :ham=>0.5}
end
end
end