#!/usr/bin/env ruby # frozen_string_literal: true require 'optparse' require 'httpspell/spider' require 'httpspell/spellchecker' require 'httpspell/version' personal_dictionary_path = nil force_language = nil tracing = nil verbose = nil limit = nil begin OptionParser.new do |parser| parser.banner.prepend <<~BANNER Spellchecks a website via HTTP. BANNER parser.version = HttpSpell::VERSION parser.on('-p', '--personal-dictionary=FILE', 'path to the personal dictionary file') do |p| personal_dictionary_path = p end parser.on('-l', '--language=LANGUAGE', 'override LANGUAGE of content') do |l| force_language = l end parser.on('-L', '--limit=EXPRESSION', 'limit recursive retrieval to URLs matching a regular EXPRESSION') do |l| limit = Regexp.new(l) end parser.on('-t', '--trace', 'enable error tracing') do tracing = true end parser.on('-V', '--verbose', "explain what's happening") do verbose = true end # TODO: --recursive, defaults to false # TODO wget has some additional options for recursive behavior that should be reviewed end.parse! rescue StandardError warn "Error - #{$ERROR_INFO}" exit 1 end if ARGV.size != 1 warn "Expected exactly one argument, but received #{ARGV.size}." exit 1 end spell_checker = HttpSpell::SpellChecker.new(personal_dictionary_path) has_unknown_words = false begin HttpSpell::Spider.new(ARGV.first, limit: limit, tracing: tracing).start do |url, doc| lang = force_language || doc.root['lang'] || ENV['LANGUAGE'] # Remove sections that are not to be spellchecked doc.css('pre').each(&:unlink) doc.css('code').each(&:unlink) doc.css('[spellcheck=false]').each(&:unlink) # TODO: Find sections with a lang attribute and handle them separately unknown_words = spell_checker.check(doc.to_s, lang) if unknown_words.empty? warn "No unknown words (language is #{lang}) at #{url}." if verbose else warn "#{unknown_words.size} unknown words (language is #{lang}) at #{url}:" if verbose puts unknown_words has_unknown_words = true end end rescue StandardError warn $ERROR_INFO.message warn $ERROR_INFO.backtrace if tracing exit 2 end exit 1 if has_unknown_words