lib/sastrawi/stemmer/stemmer.rb in sastrawi-0.1.0 vs lib/sastrawi/stemmer/stemmer.rb in sastrawi-0.1.1

- old
+ new

@@ -2,20 +2,27 @@ require 'sastrawi/stemmer/context/visitor/visitor_provider' require 'sastrawi/stemmer/filter/text_normalizer' +## +# Indonesian Stemmer +# Nazief & Adriani, CS Stemmer, ECS Stemmer, Improved ECS + module Sastrawi module Stemmer class Stemmer attr_reader :dictionary, :visitor_provider def initialize(dictionary) @dictionary = dictionary @visitor_provider = Sastrawi::Stemmer::Context::Visitor::VisitorProvider.new end + ## + # Stem a string to its base form + def stem(text) normalized_text = Sastrawi::Stemmer::Filter::TextNormalizer.normalize_text(text) words = normalized_text.split(' ') stems = [] @@ -25,10 +32,13 @@ end stems.join(' ') end + ## + # Stem a word to its base form + def stem_word(word) if plural?(word) stem_plural_word(word) else stem_singular_word(word) @@ -41,10 +51,15 @@ return matches[1].include?('-') if matches return word.include?('-') end + ## + # Stem a plural word to its base form + # Asian J. (2007) "Effective Techniques for Indonesian Text Retrieval" + # page 76-77 + def stem_plural_word(word) first_match = /^(.*)-(.*)$/.match(word) return word unless first_match @@ -69,9 +84,12 @@ root_first_word else word end end + + ## + # Stem a singular word to its base form def stem_singular_word(word) context = Sastrawi::Stemmer::Context::Context.new(word, @dictionary, @visitor_provider) context.execute