Sha256: d27ad4df703f44e5d0843c5601d421b84671340edda4121a43beb5cb79163fcf
Contents?: true
Size: 1.99 KB
Versions: 2
Compression:
Stored size: 1.99 KB
Contents
require "httpclient" require "sitemap_check/page" require "sitemap_check/logger" require "nokogiri" require "colorize" class SitemapCheck class Sitemap def initialize(url, http = HTTPClient.new, logger = Logger.new) self.logger = logger self.url = url self.checked = 0 self.http = http setup_doc end attr_reader :url, :checked def sitemaps expanded_sitemaps = maps.map do |sitemap| map = Sitemap.new(sitemap.loc.text, http) [map] + map.sitemaps end.flatten (expanded_sitemaps + [self]).uniq(&:url) end def missing_pages @_misssing ||= find_missing_pages end def errored_pages pages.select(&:error) end def exists? # rubocop:disable Style/TrivialAccessors @ok end protected attr_accessor :http, :doc, :logger attr_writer :url, :checked private def concurency ENV.fetch("CONCURRENCY", "10").to_i end def find_missing_pages # rubocop:disable Metrics/AbcSize q = Queue.new pages.each { |page| q.push page } concurency.times.map do Thread.new do begin while (page = q.pop(true)) logger.log " missing: #{page.url}".red unless page.exists? logger.log " warning: error connecting to #{page.url}".magenta if page.error end rescue ThreadError # rubocop:disable Lint/HandleExceptions end end end.each(&:join) self.checked = pages.count pages.reject(&:exists?) end def setup_doc response = http.get(url, follow_redirect: true) return unless (@ok = response.ok?) self.doc = Nokogiri::Slop(response.body) doc.remove_namespaces! rescue HTTPClient::BadResponseError @ok = false end def pages doc.urlset.url.map { |url| Page.new(url.loc.text, http) } rescue NoMethodError [] end def maps doc.sitemapindex.sitemap rescue NoMethodError [] end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
sitemap_check-0.1.4 | lib/sitemap_check/sitemap.rb |
sitemap_check-0.1.3 | lib/sitemap_check/sitemap.rb |