Sha256: d8bad2d57f9956ae086f0df91bc7151180d3023f404c6d6e6934b227e6f83920
Contents?: true
Size: 1.9 KB
Versions: 1
Compression:
Stored size: 1.9 KB
Contents
require 'httpclient' require 'sitemap_check/page' require 'sitemap_check/logger' require 'nokogiri' require 'colorize' class SitemapCheck class Sitemap def initialize(url, http = HTTPClient.new, logger = Logger.new) self.logger = logger self.url = url self.checked = 0 self.http = http setup_doc end attr_reader :url, :checked def sitemaps expanded_sitemaps = maps.map do |sitemap| map = Sitemap.new(sitemap.loc.text, http) [map] + map.sitemaps end.flatten (expanded_sitemaps + [self]).uniq(&:url) end def missing_pages @_misssing ||= find_missing_pages end def exists? # rubocop:disable Style/TrivialAccessors @ok end protected attr_accessor :http, :doc, :logger attr_writer :url, :checked private def concurency ENV.fetch('CONCURRENCY', '10').to_i end def find_missing_pages # rubocop:disable Metrics/AbcSize q = Queue.new pages.each { |page| q.push page } concurency.times.map do Thread.new do begin while (page = q.pop(true)) unless page.exists? logger.log " missing: #{page.url}".red page end end rescue ThreadError # rubocop:disable Lint/HandleExceptions end end end.each(&:join) self.checked = pages.count pages.reject(&:exists?) end def setup_doc response = http.get(url, follow_redirect: true) return unless (@ok = response.ok?) self.doc = Nokogiri::Slop(response.body) doc.remove_namespaces! rescue HTTPClient::BadResponseError @ok = false end def pages doc.urlset.url.map { |url| Page.new(url.loc.text, http) } rescue NoMethodError [] end def maps doc.sitemapindex.sitemap rescue NoMethodError [] end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
sitemap_check-0.1.1 | lib/sitemap_check/sitemap.rb |