Sha256: 91a4fa011ab4cbd0f7e3dad23047a75655ea8ff676fb4152df3a81fa4c24d4ed

Contents?: true

Size: 1.24 KB

Versions: 1

Compression:

Stored size: 1.24 KB

Contents

#!/usr/local/bin/ruby -w
require 'fileutils'
require 'test/unit'
require 'tmpdir'
require 'rsi/analysis'
require 'rsi/index' # for FIELD_TYPE_TEXT

class AnalTest < Test::Unit::TestCase

  DOC_A = "Weebles wobble but they don't fall down"
  DOC_B = "The boot is a whale, then?"
  
  def setup()
    @a = RSI::DefaultTextAnalyzer.new()    
  end

  def teardown();  end
  
  def test_types()
    t = @a.get_field_types()
    assert_equal( 1, t.size(), "Only one type" )
    assert_equal( RSI::FIELD_TYPE_TEXT, t['text'], "Text type dictionary" )
  end

  def test_text()
    t = @a.tokenize( DOC_A )
    #puts t['text'].join(":")
    assert_equal( 5, t['text'].size(), "Phrase contains 5 interesting terms" )
    assert( t['text'].include?( "DONT" ), "DONT should be in termlist" )
    t = @a.tokenize( DOC_B )
    assert_equal( 3, t['text'].size(), "Phrase contains 3 non-stopwords" )
    assert( ! t['text'].include?( "THE" ), "The is a stopword" )
  end

  def test_stopwords()
    source = "dog dogs buggy buggies child children The an a stop"
    t = @a.tokenize( source )
    #puts t['text'].join(":")
    assert_equal( 5, t['text'].size(), "porter.rb doesn't stem 'children' right" )
    assert( !t['text'].include?( "THE" ), "The is a stopword" )
  end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
rsi-0.4 tests/t_analysis.rb