Sha256: 9c31ce3e71d680d68055f4248c3e60f5bb411648112dbea2e0a9dbd82122c879
Contents?: true
Size: 1.39 KB
Versions: 22
Compression:
Stored size: 1.39 KB
Contents
module DataCatalog class Search # Returns an array of strings, tokenized with stopwords removed. # # @param [<String>] array # An array of strings # # @return [<String>] def self.process(array) unstop(tokenize(array)) end # Tokenize an array of strings. # # @param [<String>] array # An array of strings # # @return [<String>] def self.tokenize(array) array.reduce([]) do |m, x| m << tokens(x) end.flatten.uniq end REMOVE = %r([!,;]) # Tokenize a string, removing extra characters too. # # @param [String] string # # @return [<String>] def self.tokens(s) if s "#{s} ".downcase. gsub(REMOVE, ' '). gsub(%r(\. ), ' '). split(' ') else [] end end STOP_WORDS = %w( a about and are as at be by data for from how in is it of on or set that the this to was what when where an who will with the ) # Remove stopwords from an array of strings. # # @param [<String>] array # An array of strings # # @return [<String>] def self.unstop(array) array - STOP_WORDS end end end
Version data entries
22 entries across 22 versions & 1 rubygems
Version | Path |
---|---|
sinatra_resource-0.4.1 | examples/datacatalog/model_helpers/search.rb |
sinatra_resource-0.4.0 | examples/datacatalog/model_helpers/search.rb |