lib/mechanize/http/agent.rb in mechanize-2.1.1 vs lib/mechanize/http/agent.rb in mechanize-2.2

- old
+ new

@@ -76,41 +76,15 @@ # When true, this agent will consult the site's robots.txt for each access. attr_reader :robots # :section: SSL - # Path to an OpenSSL server certificate file - attr_accessor :ca_file - - # An OpenSSL private key or the path to a private key - attr_accessor :key - - # An OpenSSL client certificate or the path to a certificate file. - attr_accessor :cert - - # An SSL certificate store - attr_accessor :cert_store - # OpenSSL key password attr_accessor :pass - # A callback for additional certificate verification. See - # OpenSSL::SSL::SSLContext#verify_callback - # - # The callback can be used for debugging or to ignore errors by always - # returning +true+. Specifying nil uses the default method that was valid - # when the SSLContext was created - attr_accessor :verify_callback - - # How to verify SSL connections. Defaults to VERIFY_PEER - attr_accessor :verify_mode - # :section: Timeouts - # Reset connections that have not been used in this many seconds - attr_reader :idle_timeout - # Set to false to disable HTTP/1.1 keep-alive requests attr_accessor :keep_alive # Length of time to wait until a connection is opened in seconds attr_accessor :open_timeout @@ -121,16 +95,10 @@ # :section: # The cookies for this agent attr_accessor :cookie_jar - # URI for a proxy connection - attr_reader :proxy_uri - - # Retry non-idempotent requests? - attr_reader :retry_change_requests - # Responses larger than this will be written to a Tempfile instead of stored # in memory. attr_accessor :max_file_buffer # :section: Utility @@ -155,23 +123,19 @@ @cookie_jar = Mechanize::CookieJar.new @follow_meta_refresh = false @follow_meta_refresh_self = false @gzip_enabled = true @history = Mechanize::History.new - @idle_timeout = 5 @keep_alive = true - @keep_alive_time = 300 @max_file_buffer = 10240 @open_timeout = nil @post_connect_hooks = [] @pre_connect_hooks = [] - @proxy_uri = nil @read_timeout = nil @redirect_ok = true @redirection_limit = 20 @request_headers = {} - @retry_change_requests = false @robots = false @user_agent = nil @webrobots = nil # HTTP Authentication @@ -186,17 +150,11 @@ @password = nil # HTTP auth password @user = nil # HTTP auth user @domain = nil # NTLM HTTP domain # SSL - @ca_file = nil - @cert = nil - @cert_store = nil - @key = nil - @pass = nil - @verify_callback = nil - @verify_mode = nil + @pass = nil @scheme_handlers = Hash.new { |h, scheme| h[scheme] = lambda { |link, page| raise Mechanize::UnsupportedSchemeError, scheme } @@ -204,10 +162,14 @@ @scheme_handlers['http'] = lambda { |link, page| link } @scheme_handlers['https'] = @scheme_handlers['http'] @scheme_handlers['relative'] = @scheme_handlers['http'] @scheme_handlers['file'] = @scheme_handlers['http'] + + @http = Net::HTTP::Persistent.new 'mechanize' + @http.idle_timeout = 5 + @http.keep_alive = 300 end # Retrieves +uri+ and parses it into a page or other object according to # PluggableParser. If the URI is an HTTP or HTTPS scheme URI the given HTTP # +method+ is used to retrieve it, along with the HTTP +headers+, request @@ -271,11 +233,12 @@ res } hook_content_encoding response, uri, response_body_io - response_body_io = response_content_encoding response, response_body_io + response_body_io = response_content_encoding response, response_body_io if + request.response_body_permitted? post_connect uri, response, response_body_io page = response_parse response, response_body_io, uri @@ -304,15 +267,25 @@ else raise Mechanize::ResponseCodeError.new(page), "Unhandled response" end end + # URI for a proxy connection + + def proxy_uri + @http.proxy_uri + end + + # Retry non-idempotent requests? + def retry_change_requests + @http.retry_change_requests + end + # Retry non-idempotent requests def retry_change_requests= retri - @retry_change_requests = retri - @http.retry_change_requests = retri if @http + @http.retry_change_requests = retri end # :section: Headers def user_agent= user_agent @@ -566,61 +539,71 @@ def request_user_agent request request['User-Agent'] = @user_agent if @user_agent end def resolve(uri, referer = current_page) - uri = uri.dup if uri.is_a?(URI) + referer_uri = referer && referer.uri + if uri.is_a?(URI) + uri = uri.dup + elsif uri.nil? + if referer_uri + return referer_uri + end + raise ArgumentError, "absolute URL needed (not nil)" + else + url = uri.to_s.strip + if url.empty? + if referer_uri + return referer_uri.dup.tap { |u| u.fragment = nil } + end + raise ArgumentError, "absolute URL needed (not #{uri.inspect})" + end - unless uri.is_a?(URI) - uri = uri.to_s.strip.gsub(/[^#{0.chr}-#{126.chr}]/o) { |match| + url.gsub!(/[^#{0.chr}-#{126.chr}]/o) { |match| if RUBY_VERSION >= "1.9.0" Mechanize::Util.uri_escape(match) else sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C')[0]) end } - unescaped = uri.split(/(?:%[0-9A-Fa-f]{2})+|#/) - escaped = uri.scan(/(?:%[0-9A-Fa-f]{2})+|#/) - - escaped_uri = Mechanize::Util.html_unescape( - unescaped.zip(escaped).map { |x,y| + escaped_url = Mechanize::Util.html_unescape( + url.split(/((?:%[0-9A-Fa-f]{2})+|#)/).each_slice(2).map { |x, y| "#{WEBrick::HTTPUtils.escape(x)}#{y}" }.join('') ) begin - uri = URI.parse(escaped_uri) + uri = URI.parse(escaped_url) rescue - uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_uri)) + uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_url)) end end scheme = uri.relative? ? 'relative' : uri.scheme.downcase uri = @scheme_handlers[scheme].call(uri, referer) - if referer && referer.uri + if referer_uri if uri.path.length == 0 && uri.relative? - uri.path = referer.uri.path + uri.path = referer_uri.path end end uri.path = '/' if uri.path.length == 0 if uri.relative? raise ArgumentError, "absolute URL needed (not #{uri})" unless - referer && referer.uri + referer_uri - base = nil - if referer.respond_to?(:bases) && referer.parser - base = referer.bases.last + if referer.respond_to?(:bases) && referer.parser && + (lbase = referer.bases.last) && lbase.uri && lbase.uri.absolute? + base = lbase + else + base = nil end - uri = ((base && base.uri && base.uri.absolute?) ? - base.uri : - referer.uri) + uri - uri = referer.uri + uri + uri = referer_uri + (base ? base.uri : referer_uri) + uri # Strip initial "/.." bits from the path uri.path.sub!(/^(\/\.\.)+(?=\/)/, '') end unless ['http', 'https', 'file'].include?(uri.scheme.downcase) @@ -789,11 +772,12 @@ } end def response_follow_meta_refresh response, uri, page, redirects delay, new_url = get_meta_refresh(response, uri, page) - return nil unless new_url + return nil unless delay + new_url = new_url ? resolve(new_url, page) : uri raise Mechanize::RedirectLimitReachedError.new(page, redirects) if redirects + 1 > @redirection_limit sleep delay @@ -886,13 +870,12 @@ raise Mechanize::RedirectLimitReachedError.new(page, redirects) if redirects + 1 > @redirection_limit redirect_method = method == :head ? :head : :get - from_uri = page.uri - @history.push(page, from_uri) - new_uri = from_uri + response['Location'].to_s + @history.push(page, page.uri) + new_uri = resolve response['Location'].to_s, page fetch new_uri, redirect_method, {}, [], referer, redirects + 1 end # :section: Robots @@ -947,20 +930,108 @@ @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_robots)) end # :section: SSL + # Path to an OpenSSL CA certificate file + def ca_file + @http.ca_file + end + + # Sets the path to an OpenSSL CA certificate file + def ca_file= ca_file + @http.ca_file = ca_file + end + + # The SSL certificate store used for validating connections + def cert_store + @http.cert_store + end + + # Sets the SSL certificate store used for validating connections + def cert_store= cert_store + @http.cert_store = cert_store + end + + # The client X509 certificate def certificate @http.certificate end + # Sets the client certificate to given X509 certificate. If a path is given + # the certificate will be loaded and set. + def certificate= certificate + certificate = if OpenSSL::X509::Certificate === certificate then + certificate + else + OpenSSL::X509::Certificate.new File.read certificate + end + + @http.certificate = certificate + end + + # An OpenSSL private key or the path to a private key + def private_key + @http.private_key + end + + # Sets the client's private key + def private_key= private_key + private_key = if OpenSSL::PKey::PKey === private_key then + private_key + else + OpenSSL::PKey::RSA.new File.read(private_key), @pass + end + + @http.private_key = private_key + end + + # SSL version to use + def ssl_version + @http.ssl_version + end if RUBY_VERSION > '1.9' + + # Sets the SSL version to use + def ssl_version= ssl_version + @http.ssl_version = ssl_version + end if RUBY_VERSION > '1.9' + + # A callback for additional certificate verification. See + # OpenSSL::SSL::SSLContext#verify_callback + # + # The callback can be used for debugging or to ignore errors by always + # returning +true+. Specifying nil uses the default method that was valid + # when the SSLContext was created + def verify_callback + @http.verify_callback + end + + # Sets the certificate verify callback + def verify_callback= verify_callback + @http.verify_callback = verify_callback + end + + # How to verify SSL connections. Defaults to VERIFY_PEER + def verify_mode + @http.verify_mode + end + + # Sets the mode for verifying SSL connections + def verify_mode= verify_mode + @http.verify_mode = verify_mode + end + # :section: Timeouts - # Sets the conection idle timeout for persistent connections + # Reset connections that have not been used in this many seconds + def idle_timeout + @http.idle_timeout + end + + # Sets the connection idle timeout for persistent connections def idle_timeout= timeout - @idle_timeout = timeout - @http.idle_timeout = timeout if @http + @http.idle_timeout = timeout end # :section: Utility def inflate compressed, window_bits = nil @@ -978,52 +1049,22 @@ def log @context.log end - def set_http - @http = Net::HTTP::Persistent.new 'mechanize', @proxy_uri - - @http.keep_alive = @keep_alive_time - @http.idle_timeout = @idle_timeout if @idle_timeout - @http.retry_change_requests = @retry_change_requests - - @http.ca_file = @ca_file - @http.cert_store = @cert_store if @cert_store - @http.verify_callback = @verify_callback - @http.verify_mode = @verify_mode if @verify_mode - - # update our cached value - @verify_mode = @http.verify_mode - @cert_store = @http.cert_store - - if @cert and @key then - cert = if OpenSSL::X509::Certificate === @cert then - @cert - else - OpenSSL::X509::Certificate.new ::File.read @cert - end - - key = if OpenSSL::PKey::PKey === @key then - @key - else - OpenSSL::PKey::RSA.new ::File.read(@key), @pass - end - - @http.certificate = cert - @http.private_key = key - end - end - ## # Sets the proxy address, port, user, and password +addr+ should be a host, # with no "http://", +port+ may be a port number, service name or port # number string. - def set_proxy(addr, port, user = nil, pass = nil) - return unless addr and port + def set_proxy addr, port, user = nil, pass = nil + unless addr and port then + @http.proxy = nil + return + end + unless Integer === port then begin port = Socket.getservbyname port rescue SocketError begin @@ -1032,15 +1073,15 @@ raise ArgumentError, "invalid value for port: #{port.inspect}" end end end - @proxy_uri = URI "http://#{addr}" - @proxy_uri.port = port - @proxy_uri.user = user if user - @proxy_uri.password = pass if pass + proxy_uri = URI "http://#{addr}" + proxy_uri.port = port + proxy_uri.user = user if user + proxy_uri.password = pass if pass - @proxy_uri + @http.proxy = proxy_uri end end