exe/httpspell in httpspell-1.4.1 vs exe/httpspell in httpspell-1.5.0
- old
+ new
@@ -8,12 +8,12 @@
personal_dictionary_path = nil
force_language = nil
tracing = nil
verbose = nil
-whitelist = nil
-blacklist = []
+included = nil
+excluded = []
begin
OptionParser.new do |parser|
parser.banner.prepend <<~BANNER
Spellchecks a website via HTTP.
@@ -27,71 +27,71 @@
parser.on('-l', '--language=LANGUAGE', 'override LANGUAGE of content') do |l|
force_language = l
end
- parser.on('-w', '--whitelist=EXPRESSION', 'when recursively retrieving URLs, allow only those matching the given regular EXPRESSION') do |w|
- whitelist ||= []
- whitelist << Regexp.new(w)
+ parser.on('-i', '--include=EXPRESSION', 'when recursively retrieving URLs, allow only those matching the given regular EXPRESSION') do |w|
+ included ||= []
+ included << Regexp.new(w)
end
parser.on('-t', '--trace', 'enable error tracing') do
tracing = true
end
parser.on('-V', '--verbose', "explain what's happening") do
verbose = true
end
- parser.on('-b', '--blacklist=EXPRESSION', 'blacklist (ignore) URLs matching the given regular EXPRESSION') do |b|
- blacklist << Regexp.new(b)
+ parser.on('-e', '--exclude=EXPRESSION', 'exclude URLs matching the given regular EXPRESSION') do |b|
+ excluded << Regexp.new(b)
end
# TODO: --recursive, defaults to false
# TODO wget has some additional options for recursive behavior that should be reviewed
end.parse!
rescue StandardError
- warn "Error - #{$ERROR_INFO}"
+ warn "Error: #{$ERROR_INFO}"
exit 1
end
if ARGV.size != 1
warn "Expected exactly one argument, but received #{ARGV.size}."
exit 1
end
-def check(doc, lang, personal_dictionary_path, verbose)
+def check(url, doc, lang, personal_dictionary_path, verbose)
unknown_words = HttpSpell::SpellChecker.new(personal_dictionary_path, verbose:).check(doc, lang)
if unknown_words.empty?
- warn 'No unknown words.' if verbose
+ warn "#{url} (lang=#{lang}): No unknown words" if verbose
+ false
else
- warn "#{unknown_words.size} unknown words:" if verbose
+ warn "#{url} (lang=#{lang}): #{unknown_words.size} unknown words:" if verbose
puts unknown_words
true
end
end
has_unknown_words = false
-spider_success = HttpSpell::Spider.new(ARGV.first, whitelist:, blacklist:, verbose:, tracing:).start do |url, doc|
+spider_success = HttpSpell::Spider.new(ARGV.first, included:, excluded:, verbose:, tracing:).start do |url, doc|
lang = force_language || doc.root['lang'] || ENV.fetch('LANGUAGE', nil)
- warn "Checking #{url} as #{lang}" if verbose
# Remove elements that are not to be spellchecked
doc.css('pre').each(&:unlink)
doc.css('code').each(&:unlink)
+ doc.css('iframe').each(&:unlink)
doc.css('[spellcheck=false]').each(&:unlink)
# Handle elements with a different lang attribute separately
doc.css(%([lang]:not([lang="#{lang}"]))).each do |element|
- warn "Handling #{element.name} with lang #{element['lang']}:" if verbose
- has_unknown_words |= check(element.to_s, element['lang'], personal_dictionary_path, verbose)
+ has_unknown_words |= check("#{url} => #{element.name} with", element.to_s, element['lang'], personal_dictionary_path, verbose)
element.unlink
end
# Everything else
- has_unknown_words |= check(doc.to_s, lang, personal_dictionary_path, verbose)
+ has_unknown_words |= check("#{url} => document with", doc.to_s, lang, personal_dictionary_path, verbose)
end
exit 2 unless spider_success
exit 1 if has_unknown_words