lib/mechanize/http/agent.rb in mechanize-2.7.3 vs lib/mechanize/http/agent.rb in mechanize-2.7.4

- old
+ new

@@ -165,11 +165,11 @@ # SSL @pass = nil @scheme_handlers = Hash.new { |h, scheme| h[scheme] = lambda { |link, page| - raise Mechanize::UnsupportedSchemeError, scheme + raise Mechanize::UnsupportedSchemeError.new(scheme, link) } } @scheme_handlers['http'] = lambda { |link, page| link } @scheme_handlers['https'] = @scheme_handlers['http'] @@ -211,10 +211,14 @@ # Retrieves +uri+ and parses it into a page or other object according to # PluggableParser. If the URI is an HTTP or HTTPS scheme URI the given HTTP # +method+ is used to retrieve it, along with the HTTP +headers+, request # +params+ and HTTP +referer+. # + # The final URI to access is built with +uri+ and +params+, the + # latter of which is formatted into a string using + # Mechanize::Util.build_query_string, which see. + # # +redirects+ tracks the number of redirects experienced when retrieving the # page. If it is over the redirection_limit an error will be raised. def fetch uri, method = :get, headers = {}, params = [], referer = current_page, redirects = 0 @@ -244,13 +248,17 @@ # Add If-Modified-Since if page is in history if page = visited_page(uri) and last_modified = page.response['Last-Modified'] request['If-Modified-Since'] = last_modified end if @conditional_requests - # Specify timeouts if given - connection.open_timeout = @open_timeout if @open_timeout - connection.read_timeout = @read_timeout if @read_timeout + # Specify timeouts if supplied and our connection supports them + if @open_timeout && connection.respond_to?(:open_timeout=) + connection.open_timeout = @open_timeout + end + if @read_timeout && connection.respond_to?(:read_timeout=) + connection.read_timeout = @read_timeout + end request_log request response_body_io = nil @@ -513,10 +521,12 @@ end end def request_auth request, uri base_uri = uri + '/' + base_uri.user = nil + base_uri.password = nil schemes = @authenticate_methods[base_uri] if realm = schemes[:digest].find { |r| r.uri == base_uri } then request_auth_digest request, uri, realm, base_uri, false elsif realm = schemes[:iis_digest].find { |r| r.uri == base_uri } then @@ -528,13 +538,11 @@ end def request_auth_digest request, uri, realm, base_uri, iis challenge = @digest_challenges[realm] - user, password, = @auth_store.credentials_for uri, realm.realm - uri.user = user - uri.password = password + uri.user, uri.password, = @auth_store.credentials_for uri, realm.realm auth = @digest_auth.auth_header uri, challenge.to_s, request.method, iis request['Authorization'] = auth end @@ -606,21 +614,11 @@ end raise ArgumentError, "absolute URL needed (not #{uri.inspect})" end url.gsub!(/[^#{0.chr}-#{126.chr}]/o) { |match| - if RUBY_VERSION >= "1.9.0" - Mechanize::Util.uri_escape(match) - else - begin - sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C').first) - rescue ArgumentError - # workaround for ruby 1.8 with -Ku but ISO-8859-1 characters in - # URIs. See #227. I can't wait to drop 1.8 support - sprintf('%%%X', match.unpack('C').first) - end - end + Mechanize::Util.uri_escape(match) } escaped_url = Mechanize::Util.html_unescape( url.split(/((?:%[0-9A-Fa-f]{2})+|#)/).each_slice(2).map { |x, y| "#{WEBrick::HTTPUtils.escape(x)}#{y}" @@ -677,10 +675,22 @@ end uri end + def secure_resolve!(uri, referer = current_page) + new_uri = resolve(uri, referer) + + if (referer_uri = referer && referer.uri) && + referer_uri.scheme != 'file'.freeze && + new_uri.scheme == 'file'.freeze + raise Mechanize::Error, "insecure redirect to a file URI" + end + + new_uri + end + def resolve_parameters uri, method, parameters case method when :head, :get, :delete, :trace then if parameters and parameters.length > 0 uri.query ||= '' @@ -738,11 +748,11 @@ existing_realms = @authenticate_methods[realm.uri][auth_scheme] if existing_realms.include? realm message = 'Digest authentication failed' - raise Mechanize::UnauthorizedError.new(page, challeges, message) + raise Mechanize::UnauthorizedError.new(page, challenges, message) end existing_realms << realm @digest_challenges[realm] = challenge elsif challenge = challenges.find { |c| c.scheme == 'NTLM' } then @@ -820,11 +830,11 @@ message << " #{e.message} (#{e.class})" raise Mechanize::Error, message ensure begin if Tempfile === body_io and - (StringIO === out_io or out_io.path != body_io.path) then + (StringIO === out_io or (out_io and out_io.path != body_io.path)) then body_io.close! end rescue IOError # HACK ruby 1.8 raises IOError when closing the stream end @@ -859,11 +869,11 @@ end def response_follow_meta_refresh response, uri, page, redirects delay, new_url = get_meta_refresh(response, uri, page) return nil unless delay - new_url = new_url ? resolve(new_url, page) : uri + new_url = new_url ? secure_resolve!(new_url, page) : uri raise Mechanize::RedirectLimitReachedError.new(page, redirects) if redirects + 1 > @redirection_limit sleep delay @@ -891,14 +901,13 @@ content_length = response.content_length if use_tempfile? content_length then body_io = make_tempfile 'mechanize-raw' else - body_io = StringIO.new + body_io = StringIO.new.set_encoding(Encoding::BINARY) end - body_io.set_encoding Encoding::BINARY if body_io.respond_to? :set_encoding total = 0 begin response.read_body { |part| total += part.length @@ -968,12 +977,13 @@ # Make sure we are not copying over the POST headers from the original request ['Content-Length', 'Content-MD5', 'Content-Type'].each do |key| headers.delete key end + new_uri = secure_resolve! response['Location'].to_s, page + @history.push(page, page.uri) - new_uri = resolve response['Location'].to_s, page fetch new_uri, redirect_method, headers, [], referer, redirects + 1 end # :section: Robots @@ -1084,16 +1094,16 @@ end # SSL version to use def ssl_version @http.ssl_version - end if RUBY_VERSION > '1.9' + end # Sets the SSL version to use def ssl_version= ssl_version @http.ssl_version = ssl_version - end if RUBY_VERSION > '1.9' + end # A callback for additional certificate verification. See # OpenSSL::SSL::SSLContext#verify_callback # # The callback can be used for debugging or to ignore errors by always @@ -1139,14 +1149,12 @@ # # If a block is provided, each chunk of +input_io+ is yielded for further # processing. def auto_io name, read_size, input_io - out_io = StringIO.new + out_io = StringIO.new.set_encoding(Encoding::BINARY) - out_io.set_encoding Encoding::BINARY if out_io.respond_to? :set_encoding - until input_io.eof? do if StringIO === out_io and use_tempfile? out_io.size then new_io = make_tempfile name new_io.write out_io.string out_io = new_io @@ -1214,10 +1222,10 @@ end def make_tempfile name io = Tempfile.new name io.unlink - io.binmode if io.respond_to? :binmode + io.binmode io end def use_tempfile? size return false unless @max_file_buffer