# encoding: UTF-8 class TextNlp class StopList class << self attr_accessor :directory StopList.directory = File.join(File.dirname(__FILE__),'stoplists') end def initialize(options = {}) @cache = {} options = {:expressions => []}.merge(options) expressions = options[:expressions] if (options.key?(:name)) File.foreach(File.join(StopList.directory,"#{options[:name]}.txt")) { |e| expressions << e } end if (options.key?(:names)) options[:names].each do |name| File.foreach(File.join(StopList.directory,"#{name}.txt")) { |e| expressions << e } end end if (options.key?(:file)) File.foreach(options[:file]) { |e| expressions << e } end if (options.key?(:files)) options[:files].each do |file| File.foreach(file) { |e| expressions << e } end end expressions.each { |e| @cache[e.normalize] = true } @expressions = TextNlp::Expressions.new(expressions) end def transform(text) @expressions.expressionize(text).map { |expr| @cache.key?(expr) ? nil : expr }.compact.join(' ') end def size @expressions.values.size end end end