lib/url_canonicalize/request.rb in url_canonicalize-0.0.3 vs lib/url_canonicalize/request.rb in url_canonicalize-0.0.4

- old
+ new

@@ -23,29 +23,28 @@ end def handle_response case response when Net::HTTPSuccess - look_for_canonical + handle_success when Net::HTTPRedirection handle_redirection else handle_failure end rescue *NETWORK_EXCEPTIONS => e handle_failure(e.class, e.message) end - def look_for_canonical - # Look in response Link header - if response['link'] =~ /<(?<url>.+)>\s*;\s*rel="canonical"/i - URLCanonicalize::Response::CanonicalFound.new($LAST_MATCH_INFO['url']) - elsif http_method == :head + def handle_success + @canonical_url = $LAST_MATCH_INFO['url'] if response['link'] =~ /<(?<url>.+)>\s*;\s*rel="canonical"/i + + if http_method == :head self.http_method = :get fetch else - canonical_url ? URLCanonicalize::Response::CanonicalFound.new(canonical_url, response) : response + enhanced_response end end def handle_redirection case response @@ -59,10 +58,19 @@ def handle_failure(klass = response.class, message = response.message) URLCanonicalize::Response::Failure.new(klass, message) end + def enhanced_response + if canonical_url + response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html) + URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus) + else + URLCanonicalize::Response::Success.new(url, response, html) + end + end + def html @html ||= Nokogiri::HTML response.body end def canonical_url_element @@ -115,9 +123,10 @@ def http_method=(value) @http_method = value @request = nil @response = nil + @html = nil end # Some sites treat HEAD requests as suspicious activity and block the # requester after a few attempts. For these sites we'll use GET requests # only