lib/autosuggest.rb in autosuggest-0.1.0 vs lib/autosuggest.rb in autosuggest-0.1.1
- old
+ new
@@ -1,16 +1,23 @@
-require "autosuggest/version"
-require "lingua/stemmer"
+# stdlib
+require "set"
require "yaml" # for obscenity
+
+# dependencies
+require "lingua/stemmer"
require "obscenity"
+# modules
+require "autosuggest/version"
+
class Autosuggest
def initialize(top_queries)
@top_queries = top_queries
@concepts = {}
@words = Set.new
@non_duplicates = Set.new
+ @blocked_words = Set.new
@blacklisted_words = Set.new
@preferred_queries = {}
@profane_words = Set.new(Obscenity::Base.blacklist)
end
@@ -40,11 +47,18 @@
pairs.each do |pair|
@non_duplicates << pair.map(&:downcase).sort
end
end
+ def block_words(words)
+ words.each do |word|
+ @blocked_words << word.downcase
+ end
+ end
+
def blacklist_words(words)
+ warn "[autosuggest] blacklist_words is deprecated. Use block_words instead."
words.each do |word|
@blacklisted_words << word.downcase
end
end
@@ -97,33 +111,36 @@
end
# exclude misspellings that are not brands
misspelling = @words.any? && misspellings?(query)
- profane = blacklisted?(query, @profane_words)
+ profane = blocked?(query, @profane_words)
+ blocked = blocked?(query, @blocked_words)
+ blacklisted = blocked?(query, @blacklisted_words)
- blacklisted = blacklisted?(query, @blacklisted_words)
-
notes = []
notes << "duplicate of #{duplicate}" if duplicate
notes.concat(concepts)
notes << "misspelling" if misspelling
notes << "profane" if profane
+ notes << "blocked" if blocked
notes << "blacklisted" if blacklisted
notes << "originally #{original_query}" if original_query
- {
+ result = {
query: query,
original_query: original_query,
score: count,
duplicate: duplicate,
concepts: concepts,
misspelling: misspelling,
profane: profane,
- blacklisted: blacklisted,
- notes: notes
+ blocked: blocked
}
+ result[:blacklisted] = blacklisted if @blacklisted_words.any?
+ result[:notes] = notes
+ result
end
end
def pretty_suggestions
str = "%-30s %5s %s\n" % %w(Query Score Notes)
@@ -142,13 +159,13 @@
end
end
true
end
- def blacklisted?(query, blacklisted_words)
+ def blocked?(query, blocked_words)
recurse(tokenize(query)).each do |terms|
- return true if terms.any? { |t| blacklisted_words.include?(t) }
+ return true if terms.any? { |t| blocked_words.include?(t) }
end
false
end
def recurse(words)
@@ -171,11 +188,11 @@
def tokenize(str)
str.to_s.downcase.split(" ")
end
- # from http://blog.lojic.com/2008/09/04/how-to-write-a-spelling-corrector-in-ruby/
+ # from https://blog.lojic.com/2008/09/04/how-to-write-a-spelling-corrector-in-ruby/
LETTERS = ("a".."z").to_a.join + "'"
def edits(word)
n = word.length
deletion = (0...n).collect { |i| word[0...i] + word[i + 1..-1] }
transposition = (0...n - 1).collect { |i| word[0...i] + word[i + 1, 1] + word[i, 1] + word[i + 2..-1] }
@@ -186,17 +203,7 @@
deletion + transposition + alteration + insertion
end
def normalize_query(query)
tokenize(query.to_s.gsub("&", "and")).map { |q| Lingua.stemmer(q) }.sort.join
- end
-
- # TODO remove ActiveSupport dependency
-
- def singularize(str)
- str.singularize
- end
-
- def pluralize(str)
- str.pluralize
end
end