Sha256: fc8f13234d3076a1f676eeec532fea7aa78fcab9e4dc911c0ca2296bfbbd9cc0
Contents?: true
Size: 1.22 KB
Versions: 1
Compression:
Stored size: 1.22 KB
Contents
# encoding: UTF-8 class TextNlp class StopList class << self attr_accessor :directory StopList.directory = File.join(File.dirname(__FILE__),'stoplists') end def initialize(options = {}) @cache = {} options = {:expressions => []}.merge(options) expressions = options[:expressions] if (options.key?(:name)) File.foreach(File.join(StopList.directory,"#{options[:name]}.txt")) { |e| expressions << e } end if (options.key?(:names)) options[:names].each do |name| File.foreach(File.join(StopList.directory,"#{name}.txt")) { |e| expressions << e } end end if (options.key?(:file)) File.foreach(options[:file]) { |e| expressions << e } end if (options.key?(:files)) options[:files].each do |file| File.foreach(file) { |e| expressions << e } end end expressions.each { |e| @cache[e.normalize] = true } @expressions = TextNlp::Expressions.new(expressions) end def transform(text) @expressions.expressionize(text).map { |expr| @cache.key?(expr) ? nil : expr }.compact.join(' ') end def size @expressions.values.size end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
text_nlp-0.0.3 | lib/text_nlp/stop_list.rb |