Sha256: 342d63ef970182678e80edfce1130170cb1a4f9a0a149d768208c34224f119f4

Contents?: true

Size: 1.32 KB

Versions: 4

Compression:

Stored size: 1.32 KB

Contents

# coding: utf-8
require 'spec_helper'

module TfIdfSimilarity
  describe Token do
    describe '#valid?' do
      it 'should return false if all of its characters are numbers, punctuation or whitespace characters' do
        Token.new('1 2 3 ! @ #').valid?.should == false
      end

      it 'should return true if not all of its characters are numbers, punctuation or whitespace characters' do
        Token.new('1 2 3 ! @ # a').valid?.should == true
      end
    end

    describe '#lowercase_filter' do
      it 'should lowercase the token' do
        Token.new('HÉTÉROGÉNÉITÉ').lowercase_filter.should == 'hétérogénéité'
      end
    end

    describe '#classic_filter' do
      it 'should remove all periods' do
        Token.new('X.Y.Z.').classic_filter.should == 'XYZ'
      end

      it 'should remove ending possessives' do
        Token.new("foo's").classic_filter.should == 'foo'
      end

      it 'should remove ending possessives with nonstandard apostrophe 1' do
        Token.new("foo`s").classic_filter.should == 'foo'
      end

      it 'should remove ending possessives with nonstandard apostrophe 2' do
        Token.new("foo’s").classic_filter.should == 'foo'
      end

      it 'should not remove infix possessives' do
        Token.new("foo's bar").classic_filter.should == "foo's bar"
      end
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
tf-idf-similarity-0.3.0 spec/token_spec.rb
tf-idf-similarity-0.2.0 spec/token_spec.rb
tf-idf-similarity-0.1.6 spec/token_spec.rb
tf-idf-similarity-0.1.5 spec/token_spec.rb