lib/anemone/page.rb in anemone-0.1.2 vs lib/anemone/page.rb in anemone-0.2.0

- old
+ new

@@ -20,43 +20,53 @@ attr_accessor :code # Array of redirect-aliases for the page attr_accessor :aliases # Boolean indicating whether or not this page has been visited in PageHash#shortest_paths! attr_accessor :visited - # Used by PageHash#shortest_paths! to store depth of the page + # Depth of this page from the root of the crawl. This is not necessarily the + # shortest path; use PageHash#shortest_paths! to find that value. attr_accessor :depth + # URL of the page that brought us to this page + attr_accessor :referer # # Create a new Page from the response of an HTTP request to *url* # - def self.fetch(url) + def self.fetch(url, from_page = nil) begin - url = URI(url) if url.is_a?(String) + url = URI(url) unless url.is_a?(URI) - response, code, location = Anemone::HTTP.get(url) + if from_page + referer = from_page.url + depth = from_page.depth + 1 + end + response, code, location = Anemone::HTTP.get(url, referer) + aka = nil if !url.eql?(location) aka = location end - return Page.new(url, response.body, code, response.to_hash, aka) + return Page.new(url, response.body, code, response.to_hash, aka, referer, depth) rescue return Page.new(url) end end # # Create a new page # - def initialize(url, body = nil, code = nil, headers = nil, aka = nil) + def initialize(url, body = nil, code = nil, headers = nil, aka = nil, referer = nil, depth = 0) @url = url @code = code @headers = headers @links = [] @aliases = [] @data = OpenStruct.new - + @referer = referer + @depth = depth || 0 + @aliases << aka if !aka.nil? if body begin @doc = Nokogiri::HTML(body)