Sha256: 390aacd9a06b77745e3dca9b0baebae2cc7fc2c68cbab363785915af21dc4a47
Contents?: true
Size: 1.84 KB
Versions: 1
Compression:
Stored size: 1.84 KB
Contents
require 'httpclient' require 'sitemap_check/page' require 'nokogiri' require 'colorize' class SitemapCheck class Sitemap def initialize(url, http = HTTPClient.new) self.url = url self.checked = 0 self.http = http setup_doc end attr_reader :url, :checked def sitemaps expanded_sitemaps = maps.map do |sitemap| map = Sitemap.new(sitemap.loc.text, http) [map] + map.sitemaps end.flatten (expanded_sitemaps + [self]).uniq(&:url) end def missing_pages @_misssing ||= find_missing_pages end def exists? # rubocop:disable Style/TrivialAccessors @ok end protected attr_accessor :http, :doc attr_writer :url, :checked private def concurency ENV.fetch('CONCURENCY', 10) end def find_missing_pages # rubocop:disable Metrics/AbcSize q = Queue.new mutex = Mutex.new pages.each { |page| q.push page } concurency.times.map do Thread.new do begin while (page = q.pop(true)) unless page.exists? puts " missing: #{page.url}".red page end mutex.synchronize { self.checked += 1 } end rescue ThreadError # rubocop:disable Lint/HandleExceptions end end end.each(&:join) pages.reject(&:exists?) end def setup_doc response = http.get(url, follow_redirect: true) return unless (@ok = response.ok?) self.doc = Nokogiri::Slop(response.body) doc.remove_namespaces! rescue HTTPClient::BadResponseError @ok = false end def pages doc.urlset.url.map { |url| Page.new(url.loc.text, http) } rescue NoMethodError [] end def maps doc.sitemapindex.sitemap rescue NoMethodError [] end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
sitemap_check-0.1.0 | lib/sitemap_check/sitemap.rb |