Sha256: 5dc45f9685c390d42bb1bef0e895f67b1e17269e4b4cee2e9092900ae468f111

Contents?: true

Size: 744 Bytes

Versions: 5

Compression:

Stored size: 744 Bytes

Contents

module Retriever
	class FetchSitemap < Fetch
		attr_reader :sitemap
		def initialize(url,options)
			super
			@sitemap = [@target]
			@linkStack = self.parseInternalLinks(self.fetchLinks(fetchPage(@target)))
			self.lg("#{@linkStack.size-1} new links found")
			errlog("Bad URL -- #{@target}") if !@linkStack

			@linkStack.delete(@target) if @linkStack.include?(@target)
			@linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages)
			@sitemap.concat(@linkStack)

			self.async_crawl_and_collect()

			@sitemap.sort_by!	 {|x| x.length} if @sitemap.size>1
			@sitemap.uniq!
			@sitemap = @sitemap.take(@maxPages) if (@sitemap.size+1 > @maxPages)

			self.dump(self.sitemap)
			self.write(self.sitemap) if @output
		end
	end
end

Version data entries

5 entries across 5 versions & 1 rubygems

Version Path
rubyretriever-0.0.13 lib/retriever/fetchsitemap.rb
rubyretriever-0.0.12 lib/retriever/fetchsitemap.rb
rubyretriever-0.0.11 lib/retriever/fetchsitemap.rb
rubyretriever-0.0.10 lib/retriever/fetchsitemap.rb
rubyretriever-0.0.9 lib/fetchsitemap.rb