RubygemsResearch

Sha256: 0504cc89c71457751027bba299345bda00c35d90b86a2a2349d247713262d18a

Contents?: true

Size: 1.31 KB

Versions: 2

Compression:

Stored size: 1.31 KB

# encoding: utf-8
#
require 'spec_helper'

# TODO CLEAN UP.
#
describe Tokenizers::Index do
  
  before(:each) do
    @tokenizer = Tokenizers::Index.new
  end
  
  describe "remove_illegal_characters" do
    it "should not remove ' from a query by default" do
      @tokenizer.remove_illegals("Lugi's").should == "Lugi's"
    end
  end

  describe "reject!" do
    it "should reject tokens if blank" do
      t1 = stub(:token, :to_s => '')
      t2 = stub(:token, :to_s => 'not blank')
      t3 = stub(:token, :to_s => '')

      @tokenizer.reject([t1, t2, t3]).should == [t2]
    end
  end

  describe "tokenize" do
    describe "normalizing" do
      def self.it_should_normalize_token(text, expected)
        it "should handle the #{text} case" do
          @tokenizer.tokenize(text).to_a.should == [expected].compact
        end
      end
      # defaults
      it_should_normalize_token 'it_should_not_normalize_by_default', :it_should_not_normalize_by_default
    end
    describe "tokenizing" do
      def self.it_should_tokenize_token(text, expected)
        it "should handle the #{text} case" do
          @tokenizer.tokenize(text).to_a.should == expected
        end
      end
      # defaults
      it_should_tokenize_token "splitting on \\s", [:splitting, :on, :"\\s"]
      it_should_tokenize_token 'und', [:und]
    end
  end

end

Version data entries

2 entries across 2 versions & 1 rubygems

Version	Path
picky-0.0.3	spec/lib/tokenizers/index_spec.rb
picky-0.0.2	spec/lib/tokenizers/index_spec.rb