lib/url_canonicalize/request.rb in url_canonicalize-0.0.3 vs lib/url_canonicalize/request.rb in url_canonicalize-0.0.4
- old
+ new
@@ -23,29 +23,28 @@
end
def handle_response
case response
when Net::HTTPSuccess
- look_for_canonical
+ handle_success
when Net::HTTPRedirection
handle_redirection
else
handle_failure
end
rescue *NETWORK_EXCEPTIONS => e
handle_failure(e.class, e.message)
end
- def look_for_canonical
- # Look in response Link header
- if response['link'] =~ /<(?<url>.+)>\s*;\s*rel="canonical"/i
- URLCanonicalize::Response::CanonicalFound.new($LAST_MATCH_INFO['url'])
- elsif http_method == :head
+ def handle_success
+ @canonical_url = $LAST_MATCH_INFO['url'] if response['link'] =~ /<(?<url>.+)>\s*;\s*rel="canonical"/i
+
+ if http_method == :head
self.http_method = :get
fetch
else
- canonical_url ? URLCanonicalize::Response::CanonicalFound.new(canonical_url, response) : response
+ enhanced_response
end
end
def handle_redirection
case response
@@ -59,10 +58,19 @@
def handle_failure(klass = response.class, message = response.message)
URLCanonicalize::Response::Failure.new(klass, message)
end
+ def enhanced_response
+ if canonical_url
+ response_plus = URLCanonicalize::Response::Success.new(canonical_url, response, html)
+ URLCanonicalize::Response::CanonicalFound.new(canonical_url, response_plus)
+ else
+ URLCanonicalize::Response::Success.new(url, response, html)
+ end
+ end
+
def html
@html ||= Nokogiri::HTML response.body
end
def canonical_url_element
@@ -115,9 +123,10 @@
def http_method=(value)
@http_method = value
@request = nil
@response = nil
+ @html = nil
end
# Some sites treat HEAD requests as suspicious activity and block the
# requester after a few attempts. For these sites we'll use GET requests
# only