test/lsi/lsi_test.rb in yury-classifier-1.3.4 vs test/lsi/lsi_test.rb in yury-classifier-1.3.5

- old
+ new

@@ -6,10 +6,13 @@ @str1 = "This text deals with dogs. Dogs." @str2 = "This text involves dogs too. Dogs! " @str3 = "This text revolves around cats. Cats." @str4 = "This text also involves cats. Cats!" @str5 = "This text involves birds. Birds." + @str6 = "Is it about dogs or birds?" + @str7 = "Is it about birds or cats?" + @str8 = "I would prefer a bird over thousand cats or dogs because birds are smaller." end def test_basic_indexing lsi = Classifier::LSI.new [@str1, @str2, @str3, @str4, @str5].each { |x| lsi << x } @@ -27,21 +30,62 @@ assert lsi.needs_rebuild? lsi.build_index assert ! lsi.needs_rebuild? end + def test_basic_categorizing_with_too_small_dataset + lsi = Classifier::LSI.new + lsi.add_item @str2, "Dog" + + assert_equal nil, lsi.classify( @str1 ) + assert_equal [], lsi.classify_multiple( @str3 ) + end + def test_basic_categorizing lsi = Classifier::LSI.new lsi.add_item @str2, "Dog" lsi.add_item @str3, "Cat" lsi.add_item @str4, "Cat" lsi.add_item @str5, "Bird" assert_equal "Dog", lsi.classify( @str1 ) assert_equal "Cat", lsi.classify( @str3 ) - assert_equal "Bird", lsi.classify( @str5 ) + assert_equal "Bird", lsi.classify( @str5 ) + assert_equal "Dog", lsi.classify( @str6 ) + assert_equal "Bird", lsi.classify( @str7 ) + assert_equal "Bird", lsi.classify( @str8 ) end - + + def test_multiple_categorizing + lsi = Classifier::LSI.new + lsi.add_item @str1, "Dog" + lsi.add_item @str2, "Dog" + lsi.add_item @str3, "Cat" + lsi.add_item @str4, "Cat" + lsi.add_item @str5, "Bird" + + assert_equal ["Dog", "Bird"], lsi.classify_multiple( @str6 ) + assert_equal ["Cat", "Bird"], lsi.classify_multiple( @str7 ) + assert_equal ["Bird"], lsi.classify_multiple( @str8 ) + end + + def test_multiple_categorizing_reverse + lsi = Classifier::LSI.new + lsi.add_item @str1, "Dog" + lsi.add_item @str3, "Cat" + lsi.add_item @str4, "Cat" + lsi.add_item @str6, "Dog", "Bird", "Flying" + lsi.add_item @str7, "Cat", "Bird" + lsi.add_item @str8, "Bird", "Dog", "Cat" + + assert_equal ["Dog"], lsi.classify_multiple( @str2 ) + assert_equal ["Cat", "Bird"], lsi.classify_multiple( @str5 ) + + # test with a word unknown alone + assert_equal "Bird", lsi.classify( "Bird!" ) + assert_equal ["Bird", "Dog", "Cat"], lsi.classify_multiple( "Bird!" ) + end + def test_external_classifying lsi = Classifier::LSI.new bayes = Classifier::Bayes.new :categories => ['Dog', 'Cat', 'Bird'] lsi.add_item @str1, "Dog" ; bayes.train_dog @str1 lsi.add_item @str2, "Dog" ; bayes.train_dog @str2 \ No newline at end of file