lib/mechanize/http/agent.rb in mechanize-2.1 vs lib/mechanize/http/agent.rb in mechanize-2.1.1
- old
+ new
@@ -45,10 +45,11 @@
attr_reader :authenticate_methods # :nodoc:
attr_reader :digest_challenges # :nodoc:
attr_accessor :user
attr_accessor :password
+ attr_accessor :domain
# :section: Redirection
# Follow HTML meta refresh and HTTP Refresh. If set to +:anywhere+ meta
# refresh tags outside of the head element will be followed.
@@ -154,11 +155,11 @@
@cookie_jar = Mechanize::CookieJar.new
@follow_meta_refresh = false
@follow_meta_refresh_self = false
@gzip_enabled = true
@history = Mechanize::History.new
- @idle_timeout = nil
+ @idle_timeout = 5
@keep_alive = true
@keep_alive_time = 300
@max_file_buffer = 10240
@open_timeout = nil
@post_connect_hooks = []
@@ -182,10 +183,11 @@
end
@digest_auth = Net::HTTP::DigestAuth.new
@digest_challenges = {}
@password = nil # HTTP auth password
@user = nil # HTTP auth user
+ @domain = nil # NTLM HTTP domain
# SSL
@ca_file = nil
@cert = nil
@cert_store = nil
@@ -262,11 +264,11 @@
# Send the request
response = connection.request(uri, request) { |res|
response_log res
- response_body_io = response_read res, request
+ response_body_io = response_read res, request, uri
res
}
hook_content_encoding response, uri, response_body_io
@@ -390,10 +392,66 @@
when 'file' then
return Mechanize::FileConnection.new
end
end
+ ##
+ # Decodes a gzip-encoded +body_io+. If it cannot be decoded, inflate is
+ # tried followed by raising an error.
+
+ def content_encoding_gunzip body_io
+ log.debug('gzip response') if log
+
+ zio = Zlib::GzipReader.new body_io
+ out_io = Tempfile.new 'mechanize-decode'
+ out_io.unlink
+ out_io.binmode
+
+ until zio.eof? do
+ out_io.write zio.read 16384
+ end
+
+ zio.finish
+
+ return out_io
+ rescue Zlib::Error
+ log.error('unable to gunzip response, trying raw inflate') if log
+
+ body_io.rewind
+ body_io.read 10
+
+ begin
+ return inflate body_io, -Zlib::MAX_WBITS
+ rescue Zlib::Error => e
+ log.error("unable to gunzip response: #{e}") if log
+ raise
+ end
+ ensure
+ zio.close if zio and not zio.closed?
+ end
+
+ ##
+ # Decodes a deflate-encoded +body_io+. If it cannot be decoded, raw inflate
+ # is tried followed by raising an error.
+
+ def content_encoding_inflate body_io
+ log.debug('deflate body') if log
+
+ return inflate body_io
+ rescue Zlib::Error
+ log.error('unable to inflate response, trying raw deflate') if log
+
+ body_io.rewind
+
+ begin
+ return inflate body_io, -Zlib::MAX_WBITS
+ rescue Zlib::Error => e
+ log.error("unable to inflate response: #{e}") if log
+ raise
+ end
+ end
+
def disable_keep_alive request
request['connection'] = 'close' unless @keep_alive
end
def enable_gzip request
@@ -489,15 +547,21 @@
request.each_header do |k, v|
log.debug("request-header: #{k} => #{v}")
end
end
+ # Sets a Referer header. Fragment part is removed as demanded by
+ # RFC 2616 14.36, and user information part is removed just like
+ # major browsers do.
def request_referer request, uri, referer
return unless referer
return if 'https' == referer.scheme.downcase and
'https' != uri.scheme.downcase
-
+ if referer.fragment || referer.user || referer.password
+ referer = referer.dup
+ referer.fragment = referer.user = referer.password = nil
+ end
request['Referer'] = referer
end
def request_user_agent request
request['User-Agent'] = @user_agent if @user_agent
@@ -600,12 +664,16 @@
def response_authenticate(response, page, uri, request, headers, params,
referer)
raise Mechanize::UnauthorizedError, page unless @user || @password
- challenges = @authenticate_parser.parse response['www-authenticate']
+ www_authenticate = response['www-authenticate']
+ raise Mechanize::UnauthorizedError, page unless www_authenticate
+
+ challenges = @authenticate_parser.parse www_authenticate
+
if challenge = challenges.find { |c| c.scheme =~ /^Digest$/i } then
realm = challenge.realm uri
auth_scheme = if response['server'] =~ /Microsoft-IIS/ then
:iis_digest
@@ -629,11 +697,11 @@
existing_realms << realm
if challenge.params then
type_2 = Net::NTLM::Message.decode64 challenge.params
- type_3 = type_2.response({ :user => @user, :password => @password, },
+ type_3 = type_2.response({ :user => @user, :password => @password, :domain => @domain },
{ :ntlmv2 => true }).encode64
headers['Authorization'] = "NTLM #{type_3}"
else
type_1 = Net::NTLM::Message::Type1.new.encode64
@@ -654,75 +722,46 @@
fetch uri, request.method.downcase.to_sym, headers, params, referer
end
def response_content_encoding response, body_io
- length = response.content_length
-
- length = case body_io
- when IO, Tempfile then
- body_io.stat.size
- else
- body_io.length
- end unless length
-
- out_io = nil
-
- case response['Content-Encoding']
- when nil, 'none', '7bit' then
- out_io = body_io
- when 'deflate' then
- log.debug('deflate body') if log
-
- return if length.zero?
-
- begin
- out_io = inflate body_io
- rescue Zlib::BufError, Zlib::DataError
- log.error('Unable to inflate page, retrying with raw deflate') if log
- body_io.rewind
- begin
- out_io = inflate body_io, -Zlib::MAX_WBITS
- rescue Zlib::BufError, Zlib::DataError
- log.error("unable to inflate page: #{$!}") if log
- nil
- end
+ length = response.content_length ||
+ case body_io
+ when Tempfile, IO then
+ body_io.stat.size
+ else
+ body_io.length
end
- when 'gzip', 'x-gzip' then
- log.debug('gzip body') if log
- return if length.zero?
+ return body_io if length.zero?
- begin
- zio = Zlib::GzipReader.new body_io
- out_io = Tempfile.new 'mechanize-decode'
- out_io.binmode
+ out_io = case response['Content-Encoding']
+ when nil, 'none', '7bit' then
+ body_io
+ when 'deflate' then
+ content_encoding_inflate body_io
+ when 'gzip', 'x-gzip' then
+ content_encoding_gunzip body_io
+ else
+ raise Mechanize::Error,
+ "unsupported content-encoding: #{response['Content-Encoding']}"
+ end
- until zio.eof? do
- out_io.write zio.read 16384
- end
- rescue Zlib::BufError, Zlib::GzipFile::Error
- log.error('Unable to gunzip body, trying raw inflate') if log
- body_io.rewind
- body_io.read 10
-
- out_io = inflate body_io, -Zlib::MAX_WBITS
- rescue Zlib::DataError
- log.error("unable to gunzip page: #{$!}") if log
- ''
- ensure
- zio.close if zio and not zio.closed?
- end
- else
- raise Mechanize::Error,
- "Unsupported Content-Encoding: #{response['Content-Encoding']}"
- end
-
out_io.flush
out_io.rewind
out_io
+ rescue Zlib::Error => e
+ message = "error handling content-encoding #{response['Content-Encoding']}:"
+ message << " #{e.message} (#{e.class})"
+ raise Mechanize::Error, message
+ ensure
+ begin
+ body_io.close! if Tempfile === body_io and out_io.path != body_io.path
+ rescue IOError
+ # HACK ruby 1.8 raises IOError when closing the stream
+ end
end
def response_cookies response, uri, page
if Mechanize::Page === page and page.body =~ /Set-Cookie/n
page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
@@ -776,15 +815,16 @@
def response_parse response, body_io, uri
@context.parse uri, response, body_io
end
- def response_read response, request
+ def response_read response, request, uri
content_length = response.content_length
if content_length and content_length > @max_file_buffer then
body_io = Tempfile.new 'mechanize-raw'
+ body_io.unlink
body_io.binmode if defined? body_io.binmode
else
body_io = StringIO.new
end
@@ -795,11 +835,12 @@
response.read_body { |part|
total += part.length
if StringIO === body_io and total > @max_file_buffer then
new_io = Tempfile.new 'mechanize-raw'
- new_io.binmode if defined? binmode
+ new_io.unlink
+ new_io.binmode
new_io.write body_io.string
body_io = new_io
end
@@ -807,10 +848,11 @@
body_io.write(part)
log.debug("Read #{part.length} bytes (#{total} total)") if log
}
rescue Net::HTTP::Persistent::Error => e
body_io.rewind
- raise Mechanize::ResponseReadError.new(e, response, body_io)
+ raise Mechanize::ResponseReadError.new(e, response, body_io, uri,
+ @context)
end
body_io.flush
body_io.rewind