lib/site_health.rb in site_health-0.1.0 vs lib/site_health.rb in site_health-0.2.0

- old
+ new

@@ -1,136 +1,112 @@ -require "spidr" -require 'w3c_validators' -require "site_health/version" +# frozen_string_literal: true -require "site_health/key_struct" +require 'logger' +require 'spidr' -require 'site_health/journals/css_journal' -require 'site_health/journals/html_journal' -require 'site_health/journals/xml_journal' -require 'site_health/journals/w3c_journal' +require 'site_health/version' +require 'site_health/configuration/configuration' -require "site_health/checkers/css_page" -require "site_health/checkers/html_page" -require "site_health/checkers/xml_page" +require 'site_health/key_struct' +require 'site_health/url_map' +require 'site_health/link' +require 'site_health/checkers/checker' +require 'site_health/nurse' +require 'site_health/issues_report' + +# Top-level module/namespace module SiteHealth - def self.check(site) - Check.call(site: site) + def self.require_optional_dependency(path, gem_name: nil) + gem_name ||= path + require path + rescue LoadError => e + message_parts = [ + e.message, + "unable to require file from '#{gem_name}' gem", + 'please install it', + ] + raise(LoadError, message_parts.join(' -- ')) end - class Check - def self.call(**args) - new(**args).call - end + # @param [Checker] klass that inherits from Checker + # @return [see SiteHealth#registered_checkers] + def self.register_checker(klass) + registered_checkers[klass.name.to_sym] = klass + registered_checkers + end - BrokenLinkJournal = KeyStruct.new(:url, :exists_on) + # @return [Hash] all registered checkers + def self.registered_checkers + @checkers ||= {} + end - HTTPCodeJournal = KeyStruct.new(:url, :code) - class HTTPCodeJournal - def error? - code >= 400 - end + # @param [Symbol, String] name of the checker to be loaded + # @return [Checker] loaded class that should inherits from Checker + def self.load_checker(name) + name_key = name.to_sym + registered_checkers.fetch(name_key) do + require "site_health/checkers/#{name}" + registered_checkers[name_key] end + end - ChecksJournal = KeyStruct.new( - :missing_html_title, - :broken_urls, - :http_error_urls, - :html_error_urls, - :html_warning_urls, - :xml_error_urls, - :css_error_urls, - :css_warning_urls - ) + # @param [String] site to be checked + # @param config [SiteHealth::Configuration] the configuration to use + # @yieldparam [SiteHealth::Nurse] nurse (a.k.a agent) + # @return [Hash] journal data + # @see Nurse#journal + def self.check(site, config: SiteHealth.config) + nurse = Nurse.new(config: config) + yield(nurse) if block_given? - attr_reader :site - - def initialize(site:) - @site = site + Spidr.site(site) do |spider| + spider.every_failed_url { |url| nurse.check_failed_url(url) } + spider.every_page { |page| nurse.check_page(page) } end - def call - url_map = Hash.new { |hash, key| hash[key] = [] } + nurse.punch_out! + end - missing_html_title = [] - http_error_urls = [] - html_error_urls = [] - html_warning_urls = [] - xml_error_urls = [] - css_error_urls = [] - css_warning_urls = [] + # @param [Array<String>, String] urls to be checked + # @param config [SiteHealth::Configuration] the configuration to use + # @yieldparam [SiteHealth::Nurse] nurse (a.k.a agent) + # @return [Hash] journal data + # @see Nurse#journal + def self.check_urls(urls, config: SiteHealth.config) + nurse = Nurse.new(config: config) + yield(nurse) if block_given? - spider = Spidr.site(site) do |spider| - spider.every_link do |origin, destination| - url_map[destination] << origin - end + agent = Spidr::Agent.new - spider.every_page do |page| - code_journal = HTTPCodeJournal.new(url: page.url, code: page.code) - http_error_urls << code_journal if code_journal.error? + Array(urls).each do |url| + page = agent.get_page(url) - if page.css? - result = Checkers::CSSPage.check(page) - xml_error_urls << result if result.errors? - end - - if page.xml? - result = Checkers::XMLPage.check(page) - xml_error_urls << result if result.errors? - end - - if page.html? - result = Checkers::HTMLPage.check(page) - missing_html_title << result if result.missing_title? - html_error_urls << result if result.errors? - end - end + if page.nil? + nurse.check_failed_url(url) + next end - http_error_urls = map_http_error_urls(http_error_urls, url_map) - broken_urls = broken_links(spider, url_map) + http_error_urls - - ChecksJournal.new( - missing_html_title: missing_html_title, - broken_urls: broken_urls, - http_error_urls: http_error_urls, - html_error_urls: html_error_urls, - html_warning_urls: html_warning_urls, - xml_error_urls: xml_error_urls, - css_error_urls: css_error_urls, - css_warning_urls: css_warning_urls - ) + nurse.check_page(page) end - def validate_css_page(page, errors) - css_checker = Checkers::CSSPage.new(page) - result = css_checker.check - return unless result.errors? + nurse.punch_out! + end - result - end + # @see Configuration#logger + def self.logger + config.logger + end - def map_http_error_urls(urls, url_map) - urls.map do |failed_url| - BrokenLinkJournal.new(url: failed_url, exists_on: url_map[failed_url]) - end - end + # @return [Configuration] the current configuration + # @yieldparam [Configuration] the current configuration + def self.configure + @configuration ||= Configuration.new + yield(@configuration) if block_given? + @configuration + end - # Finds all pages which have broken links: - def broken_links(spider, url_map) - # FIXME: spider#failures only returns timeout errors etc and not HTTP error status codes.. - # so we need to have 2 types of "failed" URLs - spider.failures.map do |failed_url| - BrokenLinkJournal.new(url: failed_url, exists_on: url_map[failed_url]) - end - end - - # @return [W3CValidators::Results] - # @raise [W3CValidators::ValidatorUnavailable] the service is offline or returns 400 Bad Request - # @see https://github.com/w3c-validators/w3c_validators/issues/39 we really want to use #validate_text instead of #validate_uri but due to the linked issue thats not possible - def validate_html(html_url) - validator = W3CValidators::NuValidator.new - validator.validate_uri(html_url) - end + # @return [Configuration] the current configuration + def self.config + configure end end