Sha256: 91a4fa011ab4cbd0f7e3dad23047a75655ea8ff676fb4152df3a81fa4c24d4ed
Contents?: true
Size: 1.24 KB
Versions: 1
Compression:
Stored size: 1.24 KB
Contents
#!/usr/local/bin/ruby -w require 'fileutils' require 'test/unit' require 'tmpdir' require 'rsi/analysis' require 'rsi/index' # for FIELD_TYPE_TEXT class AnalTest < Test::Unit::TestCase DOC_A = "Weebles wobble but they don't fall down" DOC_B = "The boot is a whale, then?" def setup() @a = RSI::DefaultTextAnalyzer.new() end def teardown(); end def test_types() t = @a.get_field_types() assert_equal( 1, t.size(), "Only one type" ) assert_equal( RSI::FIELD_TYPE_TEXT, t['text'], "Text type dictionary" ) end def test_text() t = @a.tokenize( DOC_A ) #puts t['text'].join(":") assert_equal( 5, t['text'].size(), "Phrase contains 5 interesting terms" ) assert( t['text'].include?( "DONT" ), "DONT should be in termlist" ) t = @a.tokenize( DOC_B ) assert_equal( 3, t['text'].size(), "Phrase contains 3 non-stopwords" ) assert( ! t['text'].include?( "THE" ), "The is a stopword" ) end def test_stopwords() source = "dog dogs buggy buggies child children The an a stop" t = @a.tokenize( source ) #puts t['text'].join(":") assert_equal( 5, t['text'].size(), "porter.rb doesn't stem 'children' right" ) assert( !t['text'].include?( "THE" ), "The is a stopword" ) end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
rsi-0.4 | tests/t_analysis.rb |