lib/site_health.rb in site_health-0.1.0 vs lib/site_health.rb in site_health-0.2.0
- old
+ new
@@ -1,136 +1,112 @@
-require "spidr"
-require 'w3c_validators'
-require "site_health/version"
+# frozen_string_literal: true
-require "site_health/key_struct"
+require 'logger'
+require 'spidr'
-require 'site_health/journals/css_journal'
-require 'site_health/journals/html_journal'
-require 'site_health/journals/xml_journal'
-require 'site_health/journals/w3c_journal'
+require 'site_health/version'
+require 'site_health/configuration/configuration'
-require "site_health/checkers/css_page"
-require "site_health/checkers/html_page"
-require "site_health/checkers/xml_page"
+require 'site_health/key_struct'
+require 'site_health/url_map'
+require 'site_health/link'
+require 'site_health/checkers/checker'
+require 'site_health/nurse'
+require 'site_health/issues_report'
+
+# Top-level module/namespace
module SiteHealth
- def self.check(site)
- Check.call(site: site)
+ def self.require_optional_dependency(path, gem_name: nil)
+ gem_name ||= path
+ require path
+ rescue LoadError => e
+ message_parts = [
+ e.message,
+ "unable to require file from '#{gem_name}' gem",
+ 'please install it',
+ ]
+ raise(LoadError, message_parts.join(' -- '))
end
- class Check
- def self.call(**args)
- new(**args).call
- end
+ # @param [Checker] klass that inherits from Checker
+ # @return [see SiteHealth#registered_checkers]
+ def self.register_checker(klass)
+ registered_checkers[klass.name.to_sym] = klass
+ registered_checkers
+ end
- BrokenLinkJournal = KeyStruct.new(:url, :exists_on)
+ # @return [Hash] all registered checkers
+ def self.registered_checkers
+ @checkers ||= {}
+ end
- HTTPCodeJournal = KeyStruct.new(:url, :code)
- class HTTPCodeJournal
- def error?
- code >= 400
- end
+ # @param [Symbol, String] name of the checker to be loaded
+ # @return [Checker] loaded class that should inherits from Checker
+ def self.load_checker(name)
+ name_key = name.to_sym
+ registered_checkers.fetch(name_key) do
+ require "site_health/checkers/#{name}"
+ registered_checkers[name_key]
end
+ end
- ChecksJournal = KeyStruct.new(
- :missing_html_title,
- :broken_urls,
- :http_error_urls,
- :html_error_urls,
- :html_warning_urls,
- :xml_error_urls,
- :css_error_urls,
- :css_warning_urls
- )
+ # @param [String] site to be checked
+ # @param config [SiteHealth::Configuration] the configuration to use
+ # @yieldparam [SiteHealth::Nurse] nurse (a.k.a agent)
+ # @return [Hash] journal data
+ # @see Nurse#journal
+ def self.check(site, config: SiteHealth.config)
+ nurse = Nurse.new(config: config)
+ yield(nurse) if block_given?
- attr_reader :site
-
- def initialize(site:)
- @site = site
+ Spidr.site(site) do |spider|
+ spider.every_failed_url { |url| nurse.check_failed_url(url) }
+ spider.every_page { |page| nurse.check_page(page) }
end
- def call
- url_map = Hash.new { |hash, key| hash[key] = [] }
+ nurse.punch_out!
+ end
- missing_html_title = []
- http_error_urls = []
- html_error_urls = []
- html_warning_urls = []
- xml_error_urls = []
- css_error_urls = []
- css_warning_urls = []
+ # @param [Array<String>, String] urls to be checked
+ # @param config [SiteHealth::Configuration] the configuration to use
+ # @yieldparam [SiteHealth::Nurse] nurse (a.k.a agent)
+ # @return [Hash] journal data
+ # @see Nurse#journal
+ def self.check_urls(urls, config: SiteHealth.config)
+ nurse = Nurse.new(config: config)
+ yield(nurse) if block_given?
- spider = Spidr.site(site) do |spider|
- spider.every_link do |origin, destination|
- url_map[destination] << origin
- end
+ agent = Spidr::Agent.new
- spider.every_page do |page|
- code_journal = HTTPCodeJournal.new(url: page.url, code: page.code)
- http_error_urls << code_journal if code_journal.error?
+ Array(urls).each do |url|
+ page = agent.get_page(url)
- if page.css?
- result = Checkers::CSSPage.check(page)
- xml_error_urls << result if result.errors?
- end
-
- if page.xml?
- result = Checkers::XMLPage.check(page)
- xml_error_urls << result if result.errors?
- end
-
- if page.html?
- result = Checkers::HTMLPage.check(page)
- missing_html_title << result if result.missing_title?
- html_error_urls << result if result.errors?
- end
- end
+ if page.nil?
+ nurse.check_failed_url(url)
+ next
end
- http_error_urls = map_http_error_urls(http_error_urls, url_map)
- broken_urls = broken_links(spider, url_map) + http_error_urls
-
- ChecksJournal.new(
- missing_html_title: missing_html_title,
- broken_urls: broken_urls,
- http_error_urls: http_error_urls,
- html_error_urls: html_error_urls,
- html_warning_urls: html_warning_urls,
- xml_error_urls: xml_error_urls,
- css_error_urls: css_error_urls,
- css_warning_urls: css_warning_urls
- )
+ nurse.check_page(page)
end
- def validate_css_page(page, errors)
- css_checker = Checkers::CSSPage.new(page)
- result = css_checker.check
- return unless result.errors?
+ nurse.punch_out!
+ end
- result
- end
+ # @see Configuration#logger
+ def self.logger
+ config.logger
+ end
- def map_http_error_urls(urls, url_map)
- urls.map do |failed_url|
- BrokenLinkJournal.new(url: failed_url, exists_on: url_map[failed_url])
- end
- end
+ # @return [Configuration] the current configuration
+ # @yieldparam [Configuration] the current configuration
+ def self.configure
+ @configuration ||= Configuration.new
+ yield(@configuration) if block_given?
+ @configuration
+ end
- # Finds all pages which have broken links:
- def broken_links(spider, url_map)
- # FIXME: spider#failures only returns timeout errors etc and not HTTP error status codes..
- # so we need to have 2 types of "failed" URLs
- spider.failures.map do |failed_url|
- BrokenLinkJournal.new(url: failed_url, exists_on: url_map[failed_url])
- end
- end
-
- # @return [W3CValidators::Results]
- # @raise [W3CValidators::ValidatorUnavailable] the service is offline or returns 400 Bad Request
- # @see https://github.com/w3c-validators/w3c_validators/issues/39 we really want to use #validate_text instead of #validate_uri but due to the linked issue thats not possible
- def validate_html(html_url)
- validator = W3CValidators::NuValidator.new
- validator.validate_uri(html_url)
- end
+ # @return [Configuration] the current configuration
+ def self.config
+ configure
end
end