exe/httpspell in httpspell-1.4.1 vs exe/httpspell in httpspell-1.5.0

- old
+ new

@@ -8,12 +8,12 @@ personal_dictionary_path = nil force_language = nil tracing = nil verbose = nil -whitelist = nil -blacklist = [] +included = nil +excluded = [] begin OptionParser.new do |parser| parser.banner.prepend <<~BANNER Spellchecks a website via HTTP. @@ -27,71 +27,71 @@ parser.on('-l', '--language=LANGUAGE', 'override LANGUAGE of content') do |l| force_language = l end - parser.on('-w', '--whitelist=EXPRESSION', 'when recursively retrieving URLs, allow only those matching the given regular EXPRESSION') do |w| - whitelist ||= [] - whitelist << Regexp.new(w) + parser.on('-i', '--include=EXPRESSION', 'when recursively retrieving URLs, allow only those matching the given regular EXPRESSION') do |w| + included ||= [] + included << Regexp.new(w) end parser.on('-t', '--trace', 'enable error tracing') do tracing = true end parser.on('-V', '--verbose', "explain what's happening") do verbose = true end - parser.on('-b', '--blacklist=EXPRESSION', 'blacklist (ignore) URLs matching the given regular EXPRESSION') do |b| - blacklist << Regexp.new(b) + parser.on('-e', '--exclude=EXPRESSION', 'exclude URLs matching the given regular EXPRESSION') do |b| + excluded << Regexp.new(b) end # TODO: --recursive, defaults to false # TODO wget has some additional options for recursive behavior that should be reviewed end.parse! rescue StandardError - warn "Error - #{$ERROR_INFO}" + warn "Error: #{$ERROR_INFO}" exit 1 end if ARGV.size != 1 warn "Expected exactly one argument, but received #{ARGV.size}." exit 1 end -def check(doc, lang, personal_dictionary_path, verbose) +def check(url, doc, lang, personal_dictionary_path, verbose) unknown_words = HttpSpell::SpellChecker.new(personal_dictionary_path, verbose:).check(doc, lang) if unknown_words.empty? - warn 'No unknown words.' if verbose + warn "#{url} (lang=#{lang}): No unknown words" if verbose + false else - warn "#{unknown_words.size} unknown words:" if verbose + warn "#{url} (lang=#{lang}): #{unknown_words.size} unknown words:" if verbose puts unknown_words true end end has_unknown_words = false -spider_success = HttpSpell::Spider.new(ARGV.first, whitelist:, blacklist:, verbose:, tracing:).start do |url, doc| +spider_success = HttpSpell::Spider.new(ARGV.first, included:, excluded:, verbose:, tracing:).start do |url, doc| lang = force_language || doc.root['lang'] || ENV.fetch('LANGUAGE', nil) - warn "Checking #{url} as #{lang}" if verbose # Remove elements that are not to be spellchecked doc.css('pre').each(&:unlink) doc.css('code').each(&:unlink) + doc.css('iframe').each(&:unlink) doc.css('[spellcheck=false]').each(&:unlink) # Handle elements with a different lang attribute separately doc.css(%([lang]:not([lang="#{lang}"]))).each do |element| - warn "Handling #{element.name} with lang #{element['lang']}:" if verbose - has_unknown_words |= check(element.to_s, element['lang'], personal_dictionary_path, verbose) + has_unknown_words |= check("#{url} => #{element.name} with", element.to_s, element['lang'], personal_dictionary_path, verbose) element.unlink end # Everything else - has_unknown_words |= check(doc.to_s, lang, personal_dictionary_path, verbose) + has_unknown_words |= check("#{url} => document with", doc.to_s, lang, personal_dictionary_path, verbose) end exit 2 unless spider_success exit 1 if has_unknown_words