Sha256: 52acefa7d716c774c9ed6ca41cc8c4c408a65b9b7a8f1dffd9274f09fa695228
Contents?: true
Size: 1.26 KB
Versions: 3
Compression:
Stored size: 1.26 KB
Contents
module Retriever class FetchSitemap < Fetch def initialize(url,options) #recieves target URL and RR options, returns an array of all unique pages found on the site super @data = [@t.target] page_one = Retriever::Page.new(@t.source,@t) @linkStack = page_one.parseInternalVisitable lg("URL Crawled: #{@t.target}") lg("#{@linkStack.size-1} new links found") errlog("Bad URL -- #{@t.target}") if !@linkStack @linkStack.delete(@t.target) if @linkStack.include?(@t.target) @linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages) @data.concat(@linkStack) self.async_crawl_and_collect() @data.sort_by! {|x| x.length} if @data.size>1 @data.uniq! end def gen_xml #produces valid XML sitemap based on page collection fetched. Writes to current directory. f = File.open("sitemap-#{@t.host.split('.')[1]}.xml", 'w+') f << "<?xml version='1.0' encoding='UTF-8'?><urlset xmlns='http://www.sitemaps.org/schemas/sitemap/0.9'>" @data.each do |url| f << "<url><loc>#{url}</loc></url>" end f << "</urlset>" f.close puts "###############################" puts "File Created: sitemap-#{@t.host.split('.')[1]}.xml" puts "Object Count: #{@data.size}" puts "###############################" puts end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
rubyretriever-1.0.3 | lib/retriever/fetchsitemap.rb |
rubyretriever-1.0.2 | lib/retriever/fetchsitemap.rb |
rubyretriever-1.0.1 | lib/retriever/fetchsitemap.rb |