Sha256: 0e1698c7e360fc98e0cb15433bb4c14cdf4b68508aafa63f6d410d308f0b1b82

Contents?: true

Size: 1.89 KB

Versions: 1

Compression:

Stored size: 1.89 KB

Contents

# encoding: utf-8
#
require 'spec_helper'

require 'stemmer'

describe 'stemming' do
  let(:stemmer) {
    # Fast stemmer does not conform with the API.
    #
    module Stemmer
      class << self
        alias_method :stem, :stem_word
      end
    end
    Stemmer
  }
  
  describe 'examples' do
    it 'works correctly' do
      tokenizer = Picky::Tokenizer.new(stems_with: stemmer)
      
      # Is this really correct? Shouldn't we split after normalizing? 
      #
      # Yes – we split using more information.
      #
      tokenizer.stem('computers').should == 'comput'
      tokenizer.stem('computing').should == 'comput'
      tokenizer.stem('computed').should  == 'comput'
      tokenizer.stem('computer').should  == 'comput'
    end
    
    # This tests the weights option.
    #
    it 'stems right' do
      # Fix the Stemmer API.
      #
      module Stemmer
        class << self
          # stem_word is a bit silly, what else would you stem???
          #
          alias_method :stem, :stem_word
        end
      end
      
      index = Picky::Index.new :stemming do
        # Be aware that if !s are not removed from
        # eg. Lemming!, then stemming won't work.
        #
        indexing removes_characters: /[^a-z\s]/i,
                 stems_with: Stemmer
        category :text
      end
      
      index.replace_from id: 1, text: "Hello good Sirs, these things here need stems to work!"
      index.replace_from id: 2, text: "Stemming Lemming!"

      try = Picky::Search.new index
      
      # If you don't stem in the search, it should not be found!
      #
      try.search("text:stemming").ids.should == []

      try = Picky::Search.new index do
        searching stems_with: Stemmer
      end
      
      # With stemming in search AND indexing, it works :)
      #
      try.search("text:stemming").ids.should == [2, 1]
      try.search("text:lem").ids.should == [2]
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
picky-4.6.6 spec/functional/stemming_spec.rb