Sha256: f35c8d1b8e5616bed6b6c1f77406a60d021ad40aa55465444c7b716cf0174273
Contents?: true
Size: 1.4 KB
Versions: 1
Compression:
Stored size: 1.4 KB
Contents
require 'wordlist/exceptions' module Wordlist class Lexer # # Stop words for various languages. # # @api semipublic # # @since 1.0.0 # module StopWords # The directory containing the stop words `.txt` files. DIRECTORY = ::File.expand_path(::File.join(__dir__,'..','..','..','data','stop_words')) # # The path to the stop words `.txt` file. # # @param [Symbol] lang # The language to load. # # @return [String] # def self.path_for(lang) ::File.join(DIRECTORY,"#{lang}.txt") end # # Reads the stop words. # # @param [Symbol] lang # The language to load. # # @return [Array<String>] # # @raise [UnsupportedLanguage] # def self.read(lang) path = path_for(lang) unless ::File.file?(path) raise(UnsupportedLanguage,"unsupported language: #{lang}") end lines = ::File.readlines(path) lines.each(&:chomp!) lines end @stop_words = {} @mutex = Mutex.new # # Lazy loads the stop words for the given language. # # @param [Symbol] lang # The language to load. # # @return [Array<String>] # def self.[](lang) @mutex.synchronize do @stop_words[lang] ||= read(lang) end end end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
wordlist-1.0.0 | lib/wordlist/lexer/stop_words.rb |