exe/httpspell in httpspell-1.1.0 vs exe/httpspell in httpspell-1.2.0
- old
+ new
@@ -8,11 +8,12 @@
personal_dictionary_path = nil
force_language = nil
tracing = nil
verbose = nil
-limit = nil
+whitelist = nil
+blacklist = []
begin
OptionParser.new do |parser|
parser.banner.prepend <<~BANNER
Spellchecks a website via HTTP.
@@ -26,22 +27,27 @@
parser.on('-l', '--language=LANGUAGE', 'override LANGUAGE of content') do |l|
force_language = l
end
- parser.on('-L', '--limit=EXPRESSION', 'limit recursive retrieval to URLs matching a regular EXPRESSION') do |l|
- limit = Regexp.new(l)
+ parser.on('-w', '--whitelist=EXPRESSION', 'when recursively retrieving URLs, allow only those matching the given regular EXPRESSION') do |w|
+ whitelist ||= []
+ whitelist << Regexp.new(w)
end
parser.on('-t', '--trace', 'enable error tracing') do
tracing = true
end
parser.on('-V', '--verbose', "explain what's happening") do
verbose = true
end
+ parser.on('-b', '--blacklist=EXPRESSION', 'blacklist (ignore) URLs matching the given regular EXPRESSION') do |b|
+ blacklist << Regexp.new(b)
+ end
+
# TODO: --recursive, defaults to false
# TODO wget has some additional options for recursive behavior that should be reviewed
end.parse!
rescue StandardError
warn "Error - #{$ERROR_INFO}"
@@ -51,35 +57,30 @@
if ARGV.size != 1
warn "Expected exactly one argument, but received #{ARGV.size}."
exit 1
end
-spell_checker = HttpSpell::SpellChecker.new(personal_dictionary_path)
+spell_checker = HttpSpell::SpellChecker.new(personal_dictionary_path, tracing: tracing)
has_unknown_words = false
-begin
- HttpSpell::Spider.new(ARGV.first, limit: limit, tracing: tracing).start do |url, doc|
- lang = force_language || doc.root['lang'] || ENV['LANGUAGE']
+spider_success = HttpSpell::Spider.new(ARGV.first, whitelist: whitelist, blacklist: blacklist, tracing: tracing).start do |url, doc|
+ lang = force_language || doc.root['lang'] || ENV['LANGUAGE']
- # Remove sections that are not to be spellchecked
- doc.css('pre').each(&:unlink)
- doc.css('code').each(&:unlink)
- doc.css('[spellcheck=false]').each(&:unlink)
+ # Remove sections that are not to be spellchecked
+ doc.css('pre').each(&:unlink)
+ doc.css('code').each(&:unlink)
+ doc.css('[spellcheck=false]').each(&:unlink)
- # TODO: Find sections with a lang attribute and handle them separately
- unknown_words = spell_checker.check(doc.to_s, lang)
+ # TODO: Find sections with a lang attribute and handle them separately
+ unknown_words = spell_checker.check(doc.to_s, lang)
- if unknown_words.empty?
- warn "No unknown words (language is #{lang}) at #{url}." if verbose
- else
- warn "#{unknown_words.size} unknown words (language is #{lang}) at #{url}:" if verbose
- puts unknown_words
- has_unknown_words = true
- end
+ if unknown_words.empty?
+ warn "No unknown words (language is #{lang}) at #{url}." if verbose
+ else
+ warn "#{unknown_words.size} unknown words (language is #{lang}) at #{url}:" if verbose
+ puts unknown_words
+ has_unknown_words = true
end
-rescue StandardError
- warn $ERROR_INFO.message
- warn $ERROR_INFO.backtrace if tracing
- exit 2
end
+exit 2 unless spider_success
exit 1 if has_unknown_words