lib/autosuggest.rb in autosuggest-0.1.0 vs lib/autosuggest.rb in autosuggest-0.1.1

- old
+ new

@@ -1,16 +1,23 @@ -require "autosuggest/version" -require "lingua/stemmer" +# stdlib +require "set" require "yaml" # for obscenity + +# dependencies +require "lingua/stemmer" require "obscenity" +# modules +require "autosuggest/version" + class Autosuggest def initialize(top_queries) @top_queries = top_queries @concepts = {} @words = Set.new @non_duplicates = Set.new + @blocked_words = Set.new @blacklisted_words = Set.new @preferred_queries = {} @profane_words = Set.new(Obscenity::Base.blacklist) end @@ -40,11 +47,18 @@ pairs.each do |pair| @non_duplicates << pair.map(&:downcase).sort end end + def block_words(words) + words.each do |word| + @blocked_words << word.downcase + end + end + def blacklist_words(words) + warn "[autosuggest] blacklist_words is deprecated. Use block_words instead." words.each do |word| @blacklisted_words << word.downcase end end @@ -97,33 +111,36 @@ end # exclude misspellings that are not brands misspelling = @words.any? && misspellings?(query) - profane = blacklisted?(query, @profane_words) + profane = blocked?(query, @profane_words) + blocked = blocked?(query, @blocked_words) + blacklisted = blocked?(query, @blacklisted_words) - blacklisted = blacklisted?(query, @blacklisted_words) - notes = [] notes << "duplicate of #{duplicate}" if duplicate notes.concat(concepts) notes << "misspelling" if misspelling notes << "profane" if profane + notes << "blocked" if blocked notes << "blacklisted" if blacklisted notes << "originally #{original_query}" if original_query - { + result = { query: query, original_query: original_query, score: count, duplicate: duplicate, concepts: concepts, misspelling: misspelling, profane: profane, - blacklisted: blacklisted, - notes: notes + blocked: blocked } + result[:blacklisted] = blacklisted if @blacklisted_words.any? + result[:notes] = notes + result end end def pretty_suggestions str = "%-30s %5s %s\n" % %w(Query Score Notes) @@ -142,13 +159,13 @@ end end true end - def blacklisted?(query, blacklisted_words) + def blocked?(query, blocked_words) recurse(tokenize(query)).each do |terms| - return true if terms.any? { |t| blacklisted_words.include?(t) } + return true if terms.any? { |t| blocked_words.include?(t) } end false end def recurse(words) @@ -171,11 +188,11 @@ def tokenize(str) str.to_s.downcase.split(" ") end - # from http://blog.lojic.com/2008/09/04/how-to-write-a-spelling-corrector-in-ruby/ + # from https://blog.lojic.com/2008/09/04/how-to-write-a-spelling-corrector-in-ruby/ LETTERS = ("a".."z").to_a.join + "'" def edits(word) n = word.length deletion = (0...n).collect { |i| word[0...i] + word[i + 1..-1] } transposition = (0...n - 1).collect { |i| word[0...i] + word[i + 1, 1] + word[i, 1] + word[i + 2..-1] } @@ -186,17 +203,7 @@ deletion + transposition + alteration + insertion end def normalize_query(query) tokenize(query.to_s.gsub("&", "and")).map { |q| Lingua.stemmer(q) }.sort.join - end - - # TODO remove ActiveSupport dependency - - def singularize(str) - str.singularize - end - - def pluralize(str) - str.pluralize end end