# encoding: utf-8
#
require 'spec_helper'

require 'stemmer'
require 'lingua/stemmer'

describe 'stemming' do
  
  describe 'per-index stemming' do
    let(:stemmer) {
      # Fast stemmer does not conform with the API.
      #
      module Stemmer
        class << self
          alias_method :stem, :stem_word
        end
      end
      Stemmer
    }
    
    it 'works correctly' do
      tokenizer = Picky::Tokenizer.new(stems_with: stemmer)
      
      # Is this really correct? Shouldn't we split after normalizing? 
      #
      # Yes – we split using more information.
      #
      tokenizer.stem('computers').should == 'comput'
      tokenizer.stem('computing').should == 'comput'
      tokenizer.stem('computed').should  == 'comput'
      tokenizer.stem('computer').should  == 'comput'
    end
    
    # This tests the stems_with option.
    #
    it 'stems right (API conform Stemmer)' do
      # Fix the Stemmer API.
      #
      module Stemmer
        class << self
          # stem_word is a bit silly, what else would you stem???
          #
          alias_method :stem, :stem_word
        end
      end
      
      index = Picky::Index.new :stemming do
        # Be aware that if !s are not removed from
        # eg. Lemming!, then stemming won't work.
        #
        indexing removes_characters: /[^a-z\s]/i,
                 stems_with: Stemmer
        category :text
      end
      
      index.replace_from id: 1, text: "Hello good Sirs, these things here need stems to work!"
      index.replace_from id: 2, text: "Stemming Lemming!"

      try = Picky::Search.new index
      
      # Stems for both, so finds both.
      #
      try.search("text:stemming").ids.should == [2, 1]
      try.search("text:lem").ids.should == [2]
    end
    
    # This tests the stems_with option.
    #
    it 'stems right (Lingua::Stemmer.new)' do
      index = Picky::Index.new :stemming do
        # Be aware that if !s are not removed from
        # eg. Lemming!, then stemming won't work.
        #
        indexing removes_characters: /[^a-z\s]/i,
                 stems_with: Lingua::Stemmer.new # Both stem
        category :text
      end

      index.replace_from id: 1, text: "Hello good Sirs, these things here need stems to work!"
      index.replace_from id: 2, text: "Stemming Lemming!"

      try = Picky::Search.new index

      try.search("text:stemming").ids.should == [2, 1]
      try.search("text:lem").ids.should == [2]
    end
  end
  
  describe 'per-category stemming' do
    describe 'mixed stemming categories' do
      it 'stems some but not others' do
        index = Picky::Index.new :stemming do
          # Be aware that if !s are not removed from
          # eg. Lemming!, then stemming won't work.
          #
          indexing removes_characters: /[^a-z\s]/i
          category :text1,
                   partial: Picky::Partial::None.new,
                   indexing: { stems_with: Lingua::Stemmer.new }
          category :text2,
                   partial: Picky::Partial::None.new
        end
    
        index.replace_from id: 1, text1: 'stemming', text2: 'ios'
        index.replace_from id: 2, text1: 'ios', text2: 'stemming'

        try = Picky::Search.new index
    
        try.search("text1:stemming").ids.should == [1]
        try.search("text2:ios").ids.should == [1]
    
        try.search("text1:ios").ids.should == [2]
        try.search("text2:stemming").ids.should == [2]
      
        try.search("text1:stem").ids.should == [1]
        try.search("text2:io").ids.should == []
      
        try.search("text1:io").ids.should == [2]
        try.search("text2:stem").ids.should == []
      end
    end
  end
end