Sha256: ae33968193760e49c9b06b19dedd38de71fcf0000727c610219ab7b3678939cf

Contents?: true

Size: 752 Bytes

Versions: 2

Compression:

Stored size: 752 Bytes

Contents

module Retriever
	class FetchSitemap < Fetch
		attr_reader :sitemap
		def initialize(url,options)
			super
			@sitemap = [@target]
			@linkStack = self.parseInternalLinks(self.fetchLinks(fetchPage(@target)))
			self.lg("#{@linkStack.size-1} new links found")
			errlog("Bad URL -- #{@target}") if !@linkStack

			@linkStack.delete(@target) if @linkStack.include?(@target)
			@linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages)
			@sitemap.concat(@linkStack)

			self.async_crawl_and_collect()

			@sitemap.sort_by!	 {|x| x.length} if @sitemap.size>1
			@sitemap.uniq!
			@sitemap = @sitemap.take(@maxPages) if (@sitemap.size+1 > @maxPages)

			self.dump(self.sitemap)
			self.write(@output,self.sitemap) if @output
		end
	end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
rubyretriever-0.0.9 lib/retriever/fetchsitemap.rb
rubyretriever-0.0.8 lib/retriever/fetchsitemap.rb