httpspell in httpspell-1.1.0

- old
+ new

@@ -2,24 +2,46 @@
 # frozen_string_literal: true
 
 require 'optparse'
 require 'httpspell/spider'
 require 'httpspell/spellchecker'
+require 'httpspell/version'
 
 personal_dictionary_path = nil
+force_language = nil
+tracing = nil
+verbose = nil
+limit = nil
 
 begin
   OptionParser.new do |parser|
     parser.banner.prepend <<~BANNER
       Spellchecks a website via HTTP.
 
     BANNER
+    parser.version = HttpSpell::VERSION
 
     parser.on('-p', '--personal-dictionary=FILE', 'path to the personal dictionary file') do |p|
       personal_dictionary_path = p
     end
 
+    parser.on('-l', '--language=LANGUAGE', 'override LANGUAGE of content') do |l|
+      force_language = l
+    end
+
+    parser.on('-L', '--limit=EXPRESSION', 'limit recursive retrieval to URLs matching a regular EXPRESSION') do |l|
+      limit = Regexp.new(l)
+    end
+
+    parser.on('-t', '--trace', 'enable error tracing') do
+      tracing = true
+    end
+
+    parser.on('-V', '--verbose', "explain what's happening") do
+      verbose = true
+    end
+
     # TODO: --recursive, defaults to false
     # TODO wget has some additional options for recursive behavior that should be reviewed
   end.parse!
 rescue StandardError
   warn "Error - #{$ERROR_INFO}"
@@ -30,22 +52,34 @@
   warn "Expected exactly one argument, but received #{ARGV.size}."
   exit 1
 end
 
 spell_checker = HttpSpell::SpellChecker.new(personal_dictionary_path)
+has_unknown_words = false
 
-HttpSpell::Spider.new(ARGV.first).start do |url, doc|
-  lang = doc.root['lang'] || 'de-DE'
+begin
+  HttpSpell::Spider.new(ARGV.first, limit: limit, tracing: tracing).start do |url, doc|
+    lang = force_language || doc.root['lang'] || ENV['LANGUAGE']
 
-  # Remove sections that are not to be spellchecked
-  doc.css('pre').each(&:unlink)
-  doc.css('code').each(&:unlink)
-  doc.css('[spellcheck=false]').each(&:unlink)
+    # Remove sections that are not to be spellchecked
+    doc.css('pre').each(&:unlink)
+    doc.css('code').each(&:unlink)
+    doc.css('[spellcheck=false]').each(&:unlink)
 
-  # TODO: Find sections with a lang attribute and handle them separately
-  unknown_words = spell_checker.check(doc.to_s, lang)
+    # TODO: Find sections with a lang attribute and handle them separately
+    unknown_words = spell_checker.check(doc.to_s, lang)
 
-  unless unknown_words.empty?
-    warn "#{unknown_words.size} unknown words at #{url}:"
-    puts unknown_words
+    if unknown_words.empty?
+      warn "No unknown words (language is #{lang}) at #{url}." if verbose
+    else
+      warn "#{unknown_words.size} unknown words (language is #{lang}) at #{url}:" if verbose
+      puts unknown_words
+      has_unknown_words = true
+    end
   end
+rescue StandardError
+  warn $ERROR_INFO.message
+  warn $ERROR_INFO.backtrace if tracing
+  exit 2
 end
+
+exit 1 if has_unknown_words