lib/anemone/page.rb in anemone-0.3.0 vs lib/anemone/page.rb in anemone-0.3.1
- old
+ new
@@ -4,10 +4,12 @@
module Anemone
class Page
# The URL of the page
attr_reader :url
+ # The raw HTTP response body of the page
+ attr_reader :body
# Headers of the HTTP response
attr_reader :headers
# URL of the page this one redirected to, if any
attr_reader :redirect_to
# Exception object, if one was raised during HTTP#fetch_page
@@ -46,11 +48,13 @@
@error = params[:error]
@fetched = !params[:code].nil?
end
+ #
# Array of distinct A tag HREFs from the page
+ #
def links
return @links unless @links.nil?
@links = []
return @links if !doc
@@ -62,21 +66,29 @@
end
@links.uniq!
@links
end
+ #
# Nokogiri document for the HTML body
+ #
def doc
return @doc if @doc
@doc = Nokogiri::HTML(@body) if @body && html? rescue nil
end
+ #
# Delete the Nokogiri document and response body to conserve memory
+ #
def discard_doc!
links # force parsing of page links before we trash the document
@doc = @body = nil
end
+ #
+ # Was the page successfully fetched?
+ # +true+ if the page was fetched with no error, +false+ otherwise.
+ #
def fetched?
@fetched
end
#