lib/anemone/page.rb in anemone-0.0.1 vs lib/anemone/page.rb in anemone-0.0.2
- old
+ new
@@ -5,13 +5,17 @@
class Page
# The URL of the page
attr_reader :url
# Array of distinct A tag HREFs from the page
attr_reader :links
- # Integer response code of the page
- attr_reader :code
+ #Body of the HTTP response
+ attr_reader :body
+ #Content-type of the HTTP response
+ attr_reader :content_type
+ # Integer response code of the page
+ attr_accessor :code
# Array of redirect-aliases for the page
attr_accessor :aliases
# Boolean indicating whether or not this page has been visited in PageHash#shortest_paths!
attr_accessor :visited
# Used by PageHash#shortest_paths! to store depth of the page
@@ -29,31 +33,32 @@
aka = nil
if !url.eql?(location)
aka = location
end
- return Page.new(url, response, code, aka)
+ return Page.new(url, response.body, code, response['Content-Type'], aka)
rescue
return Page.new(url)
end
end
#
# Create a new page
#
- def initialize(url, response = nil, code = nil, aka = nil)
+ def initialize(url, body = nil, code = nil, content_type = nil, aka = nil)
@url = url
- @response = response
+ @body = body unless Anemone.options.discard_page_bodies
@code = code
+ @content_type = content_type
@links = []
@aliases = []
@aliases << aka if !aka.nil?
#get a list of distinct links on the page, in absolute url form
- if @response and @response.body
- Hpricot(@response.body).search('a').each do |a|
+ if body
+ Hpricot(body).search('a').each do |a|
u = a['href']
next if u.nil?
begin
u = URI(u)
@@ -73,11 +78,14 @@
#
# Return a new page with the same *response* and *url*, but
# with a 200 response code
#
def alias_clone(url)
- Page.new(url, @response, 200, @url)
+ p = clone
+ p.add_alias!(@aka) if !@aka.nil?
+ p.code = 200
+ p
end
#
# Add a redirect-alias String *aka* to the list of the page's aliases
#
@@ -97,30 +105,16 @@
def links_and_their_aliases(page_hash)
@links.inject([]) do |results, link|
results.concat([link].concat(page_hash[link].aliases))
end
end
-
- #
- # Returns the response body for the page
- #
- def body
- @response.body
- end
#
- # Returns the +Content-Type+ header for the page
- #
- def content_type
- @response['Content-Type']
- end
-
- #
# Returns +true+ if the page is a HTML document, returns +false+
# otherwise.
#
def html?
- (content_type =~ /text\/html/) == 0
+ (@content_type =~ /text\/html/) == 0
end
#
# Returns +true+ if the page is a HTTP redirect, returns +false+
# otherwise.
\ No newline at end of file