Sha256: 32066fca200d1f36cd507ecf36474aec2ff008adac3cc510ff7bc98a1821591b

Contents?: true

Size: 1.38 KB

Versions: 2

Compression:

Stored size: 1.38 KB

Contents

require "indonesian_stemmer/version"
require "indonesian_stemmer/morphological_utility"

module IndonesianStemmer

  class << self
    include MorphologicalUtility

    attr_accessor :number_of_syllables

    def stem(word, derivational_stemming = true)
      @flags = 0

      if word =~ /\s/
        word.split(' ').map { |w| stem(w) }
      else
        @number_of_syllables = total_syllables word

        remove_particle(word) if still_has_many_syllables?
        remove_possessive_pronoun(word) if still_has_many_syllables?

        stem_derivational(word) if derivational_stemming

        word
      end
    end


    private
      def stem_derivational(word)
        previous_size = word.size
        remove_first_order_prefix(word) if still_has_many_syllables?
        if previous_size != word.size
          previous_size = word.size
          remove_suffix(word) if still_has_many_syllables?

          if previous_size != word.size
            remove_second_order_prefix(word) if still_has_many_syllables?
          end
        else
          remove_second_order_prefix(word) if still_has_many_syllables?
          remove_suffix(word) if still_has_many_syllables?
        end
      end

      def still_has_many_syllables?
        @number_of_syllables > 2
      end
  end
end

class String
  def stem
    IndonesianStemmer.stem(self.dup)
  end

  def stem!
    IndonesianStemmer.stem(self)
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
indonesian_stemmer-0.2.0 lib/indonesian_stemmer.rb
indonesian_stemmer-0.1.1 lib/indonesian_stemmer.rb