Sha256: 636ced0e39ce677a0b76680f572c1cde5fd4d508a9fe7be38419af56634ff10b

Contents?: true

Size: 1.31 KB

Versions: 1

Compression:

Stored size: 1.31 KB

Contents

module IndonesianStemmer
  module IrregularWords
    SPECIAL_LETTERS = %w( K P N R )
    
    class << self
      
      private
        def load_words(filename, chopped = false)
          file = File.open path(filename), 'rb'
          contents = file.read.split("\n")
          file.close
          if chopped
            contents.map { |word| word[1..-1] }
          else
            contents
          end
        end

        def path(filename)
          path = File.join(
                      File.expand_path(File.dirname(__FILE__)),
                      'irregular_words', filename )
        end
    end

    SPECIAL_LETTERS.each do |letter|
      const_set("BEGINS_WITH_#{letter}", load_words("#{letter.downcase}.txt", true))
    end

    ENDS_WITH_I = load_words('akhiran-i.txt')

    ON_PREFIX_CHARACTERS = {
      'meng' => BEGINS_WITH_K,
      'peng' => BEGINS_WITH_K,
      'mem' => BEGINS_WITH_P,
      'pem' => BEGINS_WITH_P,
    }

    ENDS_WITH_COMMON_CHARACTERS = {
      'kah' => load_words('kah.txt'),
      'lah' => load_words('lah.txt'),
      'pun' => load_words('pun.txt'),
      'ku'  => load_words('ku.txt'),
      'mu'  => load_words('mu.txt'),
      'nya' => load_words('nya.txt'),
    }

    ENDS_WITH_SUFFIX_CHARACTERS = %w( majikan ) + # ENDS_WITH_KAN
                                  ENDS_WITH_I
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
indonesian_stemmer-0.2.0 lib/indonesian_stemmer/irregular_words.rb