lib/retriever/fetchsitemap.rb in rubyretriever-0.1.4 vs lib/retriever/fetchsitemap.rb in rubyretriever-1.0.0
- old
+ new
@@ -1,39 +1,35 @@
module Retriever
class FetchSitemap < Fetch
- attr_reader :sitemap
def initialize(url,options)
super
- @sitemap = [@t.target]
- @linkStack = self.parseInternalVisitableLinks(self.fetchLinks(@t.source))
+ @data = [@t.target]
+ page_one = Retriever::Page.new(@t.source,@t)
+ @linkStack = page_one.parseInternalVisitable
lg("URL Crawled: #{@t.target}")
- self.lg("#{@linkStack.size-1} new links found")
+ lg("#{@linkStack.size-1} new links found")
errlog("Bad URL -- #{@t.target}") if !@linkStack
@linkStack.delete(@t.target) if @linkStack.include?(@t.target)
@linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages)
- @sitemap.concat(@linkStack)
+ @data.concat(@linkStack)
self.async_crawl_and_collect()
- @sitemap.sort_by! {|x| x.length} if @sitemap.size>1
- @sitemap.uniq!
-
- self.dump(self.sitemap)
- self.write(self.sitemap) if /CSV/i =~ @s
- self.gen_xml(self.sitemap) if /XML/i =~ @s
+ @data.sort_by! {|x| x.length} if @data.size>1
+ @data.uniq!
end
- def gen_xml(data)
+ def gen_xml
f = File.open("sitemap-#{@t.host.split('.')[1]}.xml", 'w+')
f << "<?xml version='1.0' encoding='UTF-8'?><urlset xmlns='http://www.sitemaps.org/schemas/sitemap/0.9'>"
- data.each do |url|
+ @data.each do |url|
f << "<url><loc>#{url}</loc></url>"
end
f << "</urlset>"
f.close
puts "###############################"
puts "File Created: sitemap-#{@t.host.split('.')[1]}.xml"
- puts "Object Count: #{@sitemap.size}"
+ puts "Object Count: #{@data.size}"
puts "###############################"
puts
end
end
end
\ No newline at end of file