Sha256: 039cdb9d9efdca9d2e4185f3f41b14c52f9d64418b2cf10598f78b47a919ffbb
Contents?: true
Size: 608 Bytes
Versions: 1
Compression:
Stored size: 608 Bytes
Contents
require 'iconv' module Lunar # @private Internally used to determine the words given some str. # i.e. Words.new("the quick brown") == %w(the quick brown) class Words < Array SEPARATOR = /\s+/ def initialize(str, stopwords = true) words = str.split(SEPARATOR). reject { |w| w.to_s.strip.empty? }. map { |w| sanitize(w) } words.reject! { |w| Stopwords.include?(w) } if stopwords super(words) end private def sanitize(str) Iconv.iconv('UTF-8//IGNORE', 'UTF-8', str)[0].to_s. gsub(/[^a-zA-Z0-9\-_]/, '').downcase end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
lunar-0.5.5 | lib/lunar/words.rb |