Sha256: fc8f13234d3076a1f676eeec532fea7aa78fcab9e4dc911c0ca2296bfbbd9cc0

Contents?: true

Size: 1.22 KB

Versions: 1

Compression:

Stored size: 1.22 KB

Contents

# encoding: UTF-8

class TextNlp
  class StopList
    
    class << self
      attr_accessor :directory 
      StopList.directory = File.join(File.dirname(__FILE__),'stoplists')
    end
    
    def initialize(options = {})
      @cache = {}
      options = {:expressions => []}.merge(options)
      expressions = options[:expressions]
      if (options.key?(:name))
        File.foreach(File.join(StopList.directory,"#{options[:name]}.txt")) { |e| expressions << e }
      end
      if (options.key?(:names))
        options[:names].each do |name|
          File.foreach(File.join(StopList.directory,"#{name}.txt")) { |e| expressions << e }
        end
      end
      if (options.key?(:file))
        File.foreach(options[:file]) { |e| expressions << e }
      end
      if (options.key?(:files))
        options[:files].each do |file|
          File.foreach(file) { |e| expressions << e }
        end
      end
      expressions.each { |e| @cache[e.normalize] = true }
      @expressions = TextNlp::Expressions.new(expressions)
    end
    
    def transform(text)
      @expressions.expressionize(text).map { |expr| @cache.key?(expr) ? nil : expr }.compact.join(' ')
    end
    
    def size
      @expressions.values.size
    end

  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
text_nlp-0.0.3 lib/text_nlp/stop_list.rb