lib/retriever/fetchsitemap.rb in rubyretriever-0.1.4 vs lib/retriever/fetchsitemap.rb in rubyretriever-1.0.0

- old
+ new

@@ -1,39 +1,35 @@ module Retriever class FetchSitemap < Fetch - attr_reader :sitemap def initialize(url,options) super - @sitemap = [@t.target] - @linkStack = self.parseInternalVisitableLinks(self.fetchLinks(@t.source)) + @data = [@t.target] + page_one = Retriever::Page.new(@t.source,@t) + @linkStack = page_one.parseInternalVisitable lg("URL Crawled: #{@t.target}") - self.lg("#{@linkStack.size-1} new links found") + lg("#{@linkStack.size-1} new links found") errlog("Bad URL -- #{@t.target}") if !@linkStack @linkStack.delete(@t.target) if @linkStack.include?(@t.target) @linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages) - @sitemap.concat(@linkStack) + @data.concat(@linkStack) self.async_crawl_and_collect() - @sitemap.sort_by! {|x| x.length} if @sitemap.size>1 - @sitemap.uniq! - - self.dump(self.sitemap) - self.write(self.sitemap) if /CSV/i =~ @s - self.gen_xml(self.sitemap) if /XML/i =~ @s + @data.sort_by! {|x| x.length} if @data.size>1 + @data.uniq! end - def gen_xml(data) + def gen_xml f = File.open("sitemap-#{@t.host.split('.')[1]}.xml", 'w+') f << "<?xml version='1.0' encoding='UTF-8'?><urlset xmlns='http://www.sitemaps.org/schemas/sitemap/0.9'>" - data.each do |url| + @data.each do |url| f << "<url><loc>#{url}</loc></url>" end f << "</urlset>" f.close puts "###############################" puts "File Created: sitemap-#{@t.host.split('.')[1]}.xml" - puts "Object Count: #{@sitemap.size}" + puts "Object Count: #{@data.size}" puts "###############################" puts end end end \ No newline at end of file