lib/unwind.rb in unwind-0.2.1 vs lib/unwind.rb in unwind-0.9.0
- old
+ new
@@ -1,188 +1,53 @@
require "unwind/version"
require 'net/http'
-require 'addressable/uri'
module Unwind
- class TooManyRedirects < StandardError; end
- class MissingRedirectLocation < StandardError; end
+ class TooManyRedirects < StandardError; end
- class RedirectFollower
+ class RedirectFollower
- attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
+ attr_reader :final_url, :original_url, :redirect_limit, :response, :redirects
- def initialize(original_url, limit=5)
- @original_url, @redirect_limit = original_url, limit
- @redirects = []
- end
+ def initialize(original_url, limit=5)
+ @original_url, @redirect_limit = original_url, limit
+ @redirects = []
+ end
- def redirected?
- !(self.final_url == self.original_url)
- end
+ def resolve(current_url=nil)
- def resolve(current_url=nil, options={})
- ok_to_continue?
+ ok_to_continue?
- current_url ||= self.original_url
- #adding this header because we really only care about resolving the url
- headers = (options || {}).merge({"accept-encoding" => "none"})
+ current_url ||= self.original_url
- url = URI.parse(current_url)
+ response = Net::HTTP.get_response(URI.parse(current_url))
- request = Net::HTTP::Get.new(url)
+ if response.kind_of?(Net::HTTPRedirection)
+ @redirects << current_url
+ @redirect_limit -= 1
+ resolve(redirect_url(response))
+ else
+ @final_url = current_url
+ @response = response
+ self
+ end
+ end
- headers.each do |header, value|
- request.add_field(header, value)
- end
+ private
- response = Net::HTTP.start(
- url.host,
- url.port,
- :use_ssl => url.scheme == 'https'
- ) do |http|
- http.request(request)
- end
+ def ok_to_continue?
+ raise TooManyRedirects if redirect_limit < 0
+ end
- if is_response_redirect?(response)
- handle_redirect(redirect_url(response), current_url, response, headers)
- elsif meta_uri = meta_refresh?(current_url, response)
- handle_redirect(meta_uri, current_url, response, headers)
- else
- handle_final_response(current_url, response)
- end
+ def redirect_url(response)
+ if response['location'].nil?
+ response.body.match(/<a href=\"([^>]+)\">/i)[1]
+ else
+ response['location']
+ end
+ end
+
- self
- end
-
- def self.resolve(original_url, limit=5)
- new(original_url, limit).resolve
- end
-
- private
-
- def record_redirect(url)
- @redirects << url.to_s
- @redirect_limit -= 1
- end
-
- def is_response_redirect?(response)
- Net::HTTPRedirection === response
- end
-
- def handle_redirect(uri_to_redirect, url, response, headers)
- record_redirect url
- resolve(uri_to_redirect.normalize, apply_cookie(response, headers))
- end
-
- def handle_final_response(current_url, response)
- current_url = current_url.dup.to_s
- if Net::HTTPSuccess === response && canonical = canonical_link?(response)
- @redirects << current_url
- if Addressable::URI.parse(canonical).relative?
- @final_url = make_url_absolute(current_url, Addressable::URI.parse(canonical)).to_s
- else
- @final_url = canonical
- end
-
- else
- @final_url = current_url
- end
- @response = response
- end
-
- def ok_to_continue?
- raise TooManyRedirects if redirect_limit < 0
- end
-
- def redirect_url(response)
- if response['location'].nil?
- body_match = (response.body || "").match(/<a href=\"([^>]+)\">/i)
- raise MissingRedirectLocation unless body_match
- Addressable::URI.parse(body_match[0])
- else
- redirect_uri = Addressable::URI.parse(response['location'])
- redirect_uri.relative? ? Addressable::URI.parse(response.uri).join(response['location']) : redirect_uri
- end
- end
-
- def meta_refresh?(current_url, response)
- if Net::HTTPSuccess === response
- body_match = response.body.match(/<meta http-equiv=\"refresh\" content=\"0; URL=(.*?)\"\s*\/*>/i)
- if body_match
- uri = Addressable::URI.parse(body_match[1])
- make_url_absolute(current_url, uri)
- end
- end
- end
-
- def canonical_link?(response)
- body_match = response.body.match(/<link rel=[\'\"]canonical[\'\"] href=[\'\"](.*?)[\'\"]/i)
- body_match ? Addressable::URI.parse(body_match[1]).to_s : false
- end
-
- def apply_cookie(response, headers)
- if response.code.to_i == 302 && response['set-cookie']
- headers.merge("cookie" => CookieHash.to_cookie_string(response['set-cookie']))
- else
- #todo: should we delete the cookie at this point if it exists?
- headers
- end
- end
-
- def make_url_absolute(current_url, relative_url)
- current_uri = Addressable::URI.parse(current_url)
- if (relative_url.relative?)
- url = Addressable::URI.new(
- :scheme => current_uri.scheme,
- :user => current_uri.user,
- :password => current_uri.password,
- :host => current_uri.host,
- :port => current_uri.port,
- :path => relative_url.path,
- :query => relative_url.query,
- :fragment => relative_url.fragment)
- else
- relative_url
- end
- end
-
- end
-
- #borrowed (stolen) from HTTParty with minor updates
- #to handle all cookies existing in a single string
- class CookieHash < Hash
-
- CLIENT_COOKIES = %w{path expires domain path secure httponly}
-
- def add_cookies(value)
- case value
- when Hash
- merge!(value)
- when String
- value = value.gsub(/expires=[\w,\s\-\:]+;/i, '')
- value = value.gsub(/httponly[\,\;]*/i, '')
- value.split(/[;,]\s/).each do |cookie|
- array = cookie.split('=')
- self[array[0].strip.to_sym] = array[1]
- end
- else
- raise "add_cookies only takes a Hash or a String"
- end
- end
-
- def to_cookie_string
- delete_if { |k, v| CLIENT_COOKIES.include?(k.to_s.downcase) }.collect { |k, v| "#{k}=#{v}" }.join("; ")
- end
-
- def self.to_cookie_string(*cookie_strings)
- h = CookieHash.new
- cookie_strings.each do |cs|
- h.add_cookies(cs)
- end
-
- h.to_cookie_string
- end
- end
-
+ end
end