lib/anemone/page.rb in anemone-0.1.2 vs lib/anemone/page.rb in anemone-0.2.0
- old
+ new
@@ -20,43 +20,53 @@
attr_accessor :code
# Array of redirect-aliases for the page
attr_accessor :aliases
# Boolean indicating whether or not this page has been visited in PageHash#shortest_paths!
attr_accessor :visited
- # Used by PageHash#shortest_paths! to store depth of the page
+ # Depth of this page from the root of the crawl. This is not necessarily the
+ # shortest path; use PageHash#shortest_paths! to find that value.
attr_accessor :depth
+ # URL of the page that brought us to this page
+ attr_accessor :referer
#
# Create a new Page from the response of an HTTP request to *url*
#
- def self.fetch(url)
+ def self.fetch(url, from_page = nil)
begin
- url = URI(url) if url.is_a?(String)
+ url = URI(url) unless url.is_a?(URI)
- response, code, location = Anemone::HTTP.get(url)
+ if from_page
+ referer = from_page.url
+ depth = from_page.depth + 1
+ end
+ response, code, location = Anemone::HTTP.get(url, referer)
+
aka = nil
if !url.eql?(location)
aka = location
end
- return Page.new(url, response.body, code, response.to_hash, aka)
+ return Page.new(url, response.body, code, response.to_hash, aka, referer, depth)
rescue
return Page.new(url)
end
end
#
# Create a new page
#
- def initialize(url, body = nil, code = nil, headers = nil, aka = nil)
+ def initialize(url, body = nil, code = nil, headers = nil, aka = nil, referer = nil, depth = 0)
@url = url
@code = code
@headers = headers
@links = []
@aliases = []
@data = OpenStruct.new
-
+ @referer = referer
+ @depth = depth || 0
+
@aliases << aka if !aka.nil?
if body
begin
@doc = Nokogiri::HTML(body)