Sha256: 5dc45f9685c390d42bb1bef0e895f67b1e17269e4b4cee2e9092900ae468f111
Contents?: true
Size: 744 Bytes
Versions: 5
Compression:
Stored size: 744 Bytes
Contents
module Retriever class FetchSitemap < Fetch attr_reader :sitemap def initialize(url,options) super @sitemap = [@target] @linkStack = self.parseInternalLinks(self.fetchLinks(fetchPage(@target))) self.lg("#{@linkStack.size-1} new links found") errlog("Bad URL -- #{@target}") if !@linkStack @linkStack.delete(@target) if @linkStack.include?(@target) @linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages) @sitemap.concat(@linkStack) self.async_crawl_and_collect() @sitemap.sort_by! {|x| x.length} if @sitemap.size>1 @sitemap.uniq! @sitemap = @sitemap.take(@maxPages) if (@sitemap.size+1 > @maxPages) self.dump(self.sitemap) self.write(self.sitemap) if @output end end end
Version data entries
5 entries across 5 versions & 1 rubygems