lib/anemone/http.rb in spk-anemone-0.2.4 vs lib/anemone/http.rb in spk-anemone-0.3.0
- old
+ new
@@ -10,58 +10,69 @@
@connections = {}
@opts = opts
end
#
- # Create a new Page from the response of an HTTP request to *url*
+ # Fetch a single Page from the response of an HTTP request to *url*.
+ # Just gets the final destination page.
#
- def fetch_page(url, from_page = nil)
+ def fetch_page(url, referer = nil, depth = nil)
+ fetch_pages(url, referer, depth).last
+ end
+
+ #
+ # Create new Pages from the response of an HTTP request to *url*,
+ # including redirects
+ #
+ def fetch_pages(url, referer = nil, depth = nil)
begin
url = URI(url) unless url.is_a?(URI)
-
- if from_page
- referer = from_page.url
- depth = from_page.depth + 1
+ pages = []
+ get(url, referer) do |response, code, location, redirect_to, response_time|
+ pages << Page.new(location, :body => response.body.dup,
+ :code => code,
+ :headers => response.to_hash,
+ :referer => referer,
+ :depth => depth,
+ :redirect_to => redirect_to,
+ :response_time => response_time)
end
- response, code, location, response_time = get(url, referer)
-
- aka = nil
- if !url.eql?(location)
- aka = location
- end
-
- return Page.new(url, response.body.dup, code, response.to_hash, aka, referer, depth, response_time)
+ return pages
rescue => e
if verbose?
puts e.inspect
puts e.backtrace
end
- return Page.new(url)
+ return [Page.new(url, :error => e)]
end
end
private
#
- # Retrieve an HTTP response for *url*, following redirects.
- # Returns the response object, response code, and final URI location.
+ # Retrieve HTTP responses for *url*, including redirects.
+ # Yields the response object, response code, and URI location
+ # for each response.
#
def get(url, referer = nil)
response, response_time = get_response(url, referer)
code = Integer(response.code)
loc = url
+ redirect_to = response.is_a?(Net::HTTPRedirection) ? URI(response['location']) : nil
+ yield response, code, loc, redirect_to, response_time
limit = redirect_limit
while response.is_a?(Net::HTTPRedirection) and limit > 0
- loc = URI(response['location'])
+ loc = redirect_to
loc = url.merge(loc) if loc.relative?
response, response_time = get_response(loc, referer)
+ code = Integer(response.code)
+ redirect_to = response.is_a?(Net::HTTPRedirection) ? URI(response['location']) : nil
+ yield response, code, loc, redirect_to, response_time
limit -= 1
end
-
- return response, code, loc, response_time
end
#
# Get an HTTPResponse for *url*, sending the appropriate User-Agent string
#
@@ -92,10 +103,10 @@
if conn = @connections[url.host][url.port]
return conn
end
- refresh_connection(url)
+ refresh_connection url
end
def refresh_connection(url)
http = Net::HTTP.new(url.host, url.port)
if url.scheme == 'https'