lib/retriever/fetchseo.rb in rubyretriever-1.0.3 vs lib/retriever/fetchseo.rb in rubyretriever-1.1.0

- old
+ new

@@ -1,23 +1,25 @@ module Retriever - class FetchSEO < Fetch - def initialize(url,options) #recieves target url and RR options, returns an array of onpage SEO related fields on all unique pages found on the site - super - @data = [] - page_one = Retriever::Page.new(@t.source,@t) - @linkStack = page_one.parseInternalVisitable - lg("URL Crawled: #{@t.target}") - lg("#{@linkStack.size-1} new links found") + # + class FetchSEO < Fetch + # recieves target url and RR options + # returns an array of onpage SEO related fields + # on all unique pages found on the site + def initialize(url, options) + super + @data = [] + page_one = Retriever::Page.new(@t.source, @t) + lg("URL Crawled: #{@t.target}") - @data.push(page_one.parseSEO) - lg("#{@data.size} pages scraped") - errlog("Bad URL -- #{@t.target}") if !@linkStack + @link_stack = page_one.parse_internal_visitable + errlog("Bad URL -- #{@t.target}") unless @link_stack + lg("#{@link_stack.size - 1} links found") + @link_stack.delete(@t.target) - @linkStack.delete(@t.target) if @linkStack.include?(@t.target) - @linkStack = @linkStack.take(@maxPages) if (@linkStack.size+1 > @maxPages) + @data.push(page_one.parse_seo) - self.async_crawl_and_collect() + async_crawl_and_collect - @data.sort_by! {|x| x[0].length} - end - end -end \ No newline at end of file + @data.sort_by! { |x| x[0].length } + end + end +end