lib/mechanize/http/agent.rb in mechanize-2.7.3 vs lib/mechanize/http/agent.rb in mechanize-2.7.4
- old
+ new
@@ -165,11 +165,11 @@
# SSL
@pass = nil
@scheme_handlers = Hash.new { |h, scheme|
h[scheme] = lambda { |link, page|
- raise Mechanize::UnsupportedSchemeError, scheme
+ raise Mechanize::UnsupportedSchemeError.new(scheme, link)
}
}
@scheme_handlers['http'] = lambda { |link, page| link }
@scheme_handlers['https'] = @scheme_handlers['http']
@@ -211,10 +211,14 @@
# Retrieves +uri+ and parses it into a page or other object according to
# PluggableParser. If the URI is an HTTP or HTTPS scheme URI the given HTTP
# +method+ is used to retrieve it, along with the HTTP +headers+, request
# +params+ and HTTP +referer+.
#
+ # The final URI to access is built with +uri+ and +params+, the
+ # latter of which is formatted into a string using
+ # Mechanize::Util.build_query_string, which see.
+ #
# +redirects+ tracks the number of redirects experienced when retrieving the
# page. If it is over the redirection_limit an error will be raised.
def fetch uri, method = :get, headers = {}, params = [],
referer = current_page, redirects = 0
@@ -244,13 +248,17 @@
# Add If-Modified-Since if page is in history
if page = visited_page(uri) and last_modified = page.response['Last-Modified']
request['If-Modified-Since'] = last_modified
end if @conditional_requests
- # Specify timeouts if given
- connection.open_timeout = @open_timeout if @open_timeout
- connection.read_timeout = @read_timeout if @read_timeout
+ # Specify timeouts if supplied and our connection supports them
+ if @open_timeout && connection.respond_to?(:open_timeout=)
+ connection.open_timeout = @open_timeout
+ end
+ if @read_timeout && connection.respond_to?(:read_timeout=)
+ connection.read_timeout = @read_timeout
+ end
request_log request
response_body_io = nil
@@ -513,10 +521,12 @@
end
end
def request_auth request, uri
base_uri = uri + '/'
+ base_uri.user = nil
+ base_uri.password = nil
schemes = @authenticate_methods[base_uri]
if realm = schemes[:digest].find { |r| r.uri == base_uri } then
request_auth_digest request, uri, realm, base_uri, false
elsif realm = schemes[:iis_digest].find { |r| r.uri == base_uri } then
@@ -528,13 +538,11 @@
end
def request_auth_digest request, uri, realm, base_uri, iis
challenge = @digest_challenges[realm]
- user, password, = @auth_store.credentials_for uri, realm.realm
- uri.user = user
- uri.password = password
+ uri.user, uri.password, = @auth_store.credentials_for uri, realm.realm
auth = @digest_auth.auth_header uri, challenge.to_s, request.method, iis
request['Authorization'] = auth
end
@@ -606,21 +614,11 @@
end
raise ArgumentError, "absolute URL needed (not #{uri.inspect})"
end
url.gsub!(/[^#{0.chr}-#{126.chr}]/o) { |match|
- if RUBY_VERSION >= "1.9.0"
- Mechanize::Util.uri_escape(match)
- else
- begin
- sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C').first)
- rescue ArgumentError
- # workaround for ruby 1.8 with -Ku but ISO-8859-1 characters in
- # URIs. See #227. I can't wait to drop 1.8 support
- sprintf('%%%X', match.unpack('C').first)
- end
- end
+ Mechanize::Util.uri_escape(match)
}
escaped_url = Mechanize::Util.html_unescape(
url.split(/((?:%[0-9A-Fa-f]{2})+|#)/).each_slice(2).map { |x, y|
"#{WEBrick::HTTPUtils.escape(x)}#{y}"
@@ -677,10 +675,22 @@
end
uri
end
+ def secure_resolve!(uri, referer = current_page)
+ new_uri = resolve(uri, referer)
+
+ if (referer_uri = referer && referer.uri) &&
+ referer_uri.scheme != 'file'.freeze &&
+ new_uri.scheme == 'file'.freeze
+ raise Mechanize::Error, "insecure redirect to a file URI"
+ end
+
+ new_uri
+ end
+
def resolve_parameters uri, method, parameters
case method
when :head, :get, :delete, :trace then
if parameters and parameters.length > 0
uri.query ||= ''
@@ -738,11 +748,11 @@
existing_realms = @authenticate_methods[realm.uri][auth_scheme]
if existing_realms.include? realm
message = 'Digest authentication failed'
- raise Mechanize::UnauthorizedError.new(page, challeges, message)
+ raise Mechanize::UnauthorizedError.new(page, challenges, message)
end
existing_realms << realm
@digest_challenges[realm] = challenge
elsif challenge = challenges.find { |c| c.scheme == 'NTLM' } then
@@ -820,11 +830,11 @@
message << " #{e.message} (#{e.class})"
raise Mechanize::Error, message
ensure
begin
if Tempfile === body_io and
- (StringIO === out_io or out_io.path != body_io.path) then
+ (StringIO === out_io or (out_io and out_io.path != body_io.path)) then
body_io.close!
end
rescue IOError
# HACK ruby 1.8 raises IOError when closing the stream
end
@@ -859,11 +869,11 @@
end
def response_follow_meta_refresh response, uri, page, redirects
delay, new_url = get_meta_refresh(response, uri, page)
return nil unless delay
- new_url = new_url ? resolve(new_url, page) : uri
+ new_url = new_url ? secure_resolve!(new_url, page) : uri
raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
redirects + 1 > @redirection_limit
sleep delay
@@ -891,14 +901,13 @@
content_length = response.content_length
if use_tempfile? content_length then
body_io = make_tempfile 'mechanize-raw'
else
- body_io = StringIO.new
+ body_io = StringIO.new.set_encoding(Encoding::BINARY)
end
- body_io.set_encoding Encoding::BINARY if body_io.respond_to? :set_encoding
total = 0
begin
response.read_body { |part|
total += part.length
@@ -968,12 +977,13 @@
# Make sure we are not copying over the POST headers from the original request
['Content-Length', 'Content-MD5', 'Content-Type'].each do |key|
headers.delete key
end
+ new_uri = secure_resolve! response['Location'].to_s, page
+
@history.push(page, page.uri)
- new_uri = resolve response['Location'].to_s, page
fetch new_uri, redirect_method, headers, [], referer, redirects + 1
end
# :section: Robots
@@ -1084,16 +1094,16 @@
end
# SSL version to use
def ssl_version
@http.ssl_version
- end if RUBY_VERSION > '1.9'
+ end
# Sets the SSL version to use
def ssl_version= ssl_version
@http.ssl_version = ssl_version
- end if RUBY_VERSION > '1.9'
+ end
# A callback for additional certificate verification. See
# OpenSSL::SSL::SSLContext#verify_callback
#
# The callback can be used for debugging or to ignore errors by always
@@ -1139,14 +1149,12 @@
#
# If a block is provided, each chunk of +input_io+ is yielded for further
# processing.
def auto_io name, read_size, input_io
- out_io = StringIO.new
+ out_io = StringIO.new.set_encoding(Encoding::BINARY)
- out_io.set_encoding Encoding::BINARY if out_io.respond_to? :set_encoding
-
until input_io.eof? do
if StringIO === out_io and use_tempfile? out_io.size then
new_io = make_tempfile name
new_io.write out_io.string
out_io = new_io
@@ -1214,10 +1222,10 @@
end
def make_tempfile name
io = Tempfile.new name
io.unlink
- io.binmode if io.respond_to? :binmode
+ io.binmode
io
end
def use_tempfile? size
return false unless @max_file_buffer