Sha256: 81e86999bbeb0cd61a4cbb71bfb114833266935fd3cdc912d7bbd4d4e91d5db6

Contents?: true

Size: 686 Bytes

Versions: 3

Compression:

Stored size: 686 Bytes

Contents

module Classifier

  module SkipWords

    def self.for(language)
      unless SKIP_WORDS.has_key?(language)
        SKIP_WORDS[language] = load_stopwords(language) || []
      end
      SKIP_WORDS[language]
    end

    protected

      def self.load_stopwords(language)
        lang_file = File.join(File.dirname(__FILE__), 'stopwords', language)
        if File.exist?(lang_file)
          data = []
          File.open(lang_file, 'r:utf-8') do |f|
            f.each_line do |line|
              line = line.gsub(/#.*/, '').strip
              data << line unless line.empty?
            end
          end
          data
        end
      end
        
    SKIP_WORDS = {}
  end
end

Version data entries

3 entries across 3 versions & 1 rubygems

Version Path
luisparravicini-classifier-1.3.9 lib/classifier/stopwords.rb
luisparravicini-classifier-1.3.8 lib/classifier/stopwords.rb
luisparravicini-classifier-1.3.7 lib/classifier/stopwords.rb