lib/anemone/http.rb in spk-anemone-0.2.4 vs lib/anemone/http.rb in spk-anemone-0.3.0

- old
+ new

@@ -10,58 +10,69 @@ @connections = {} @opts = opts end # - # Create a new Page from the response of an HTTP request to *url* + # Fetch a single Page from the response of an HTTP request to *url*. + # Just gets the final destination page. # - def fetch_page(url, from_page = nil) + def fetch_page(url, referer = nil, depth = nil) + fetch_pages(url, referer, depth).last + end + + # + # Create new Pages from the response of an HTTP request to *url*, + # including redirects + # + def fetch_pages(url, referer = nil, depth = nil) begin url = URI(url) unless url.is_a?(URI) - - if from_page - referer = from_page.url - depth = from_page.depth + 1 + pages = [] + get(url, referer) do |response, code, location, redirect_to, response_time| + pages << Page.new(location, :body => response.body.dup, + :code => code, + :headers => response.to_hash, + :referer => referer, + :depth => depth, + :redirect_to => redirect_to, + :response_time => response_time) end - response, code, location, response_time = get(url, referer) - - aka = nil - if !url.eql?(location) - aka = location - end - - return Page.new(url, response.body.dup, code, response.to_hash, aka, referer, depth, response_time) + return pages rescue => e if verbose? puts e.inspect puts e.backtrace end - return Page.new(url) + return [Page.new(url, :error => e)] end end private # - # Retrieve an HTTP response for *url*, following redirects. - # Returns the response object, response code, and final URI location. + # Retrieve HTTP responses for *url*, including redirects. + # Yields the response object, response code, and URI location + # for each response. # def get(url, referer = nil) response, response_time = get_response(url, referer) code = Integer(response.code) loc = url + redirect_to = response.is_a?(Net::HTTPRedirection) ? URI(response['location']) : nil + yield response, code, loc, redirect_to, response_time limit = redirect_limit while response.is_a?(Net::HTTPRedirection) and limit > 0 - loc = URI(response['location']) + loc = redirect_to loc = url.merge(loc) if loc.relative? response, response_time = get_response(loc, referer) + code = Integer(response.code) + redirect_to = response.is_a?(Net::HTTPRedirection) ? URI(response['location']) : nil + yield response, code, loc, redirect_to, response_time limit -= 1 end - - return response, code, loc, response_time end # # Get an HTTPResponse for *url*, sending the appropriate User-Agent string # @@ -92,10 +103,10 @@ if conn = @connections[url.host][url.port] return conn end - refresh_connection(url) + refresh_connection url end def refresh_connection(url) http = Net::HTTP.new(url.host, url.port) if url.scheme == 'https'