lib/mechanize/http/agent.rb in mechanize-2.1 vs lib/mechanize/http/agent.rb in mechanize-2.1.1

- old
+ new

@@ -45,10 +45,11 @@ attr_reader :authenticate_methods # :nodoc: attr_reader :digest_challenges # :nodoc: attr_accessor :user attr_accessor :password + attr_accessor :domain # :section: Redirection # Follow HTML meta refresh and HTTP Refresh. If set to +:anywhere+ meta # refresh tags outside of the head element will be followed. @@ -154,11 +155,11 @@ @cookie_jar = Mechanize::CookieJar.new @follow_meta_refresh = false @follow_meta_refresh_self = false @gzip_enabled = true @history = Mechanize::History.new - @idle_timeout = nil + @idle_timeout = 5 @keep_alive = true @keep_alive_time = 300 @max_file_buffer = 10240 @open_timeout = nil @post_connect_hooks = [] @@ -182,10 +183,11 @@ end @digest_auth = Net::HTTP::DigestAuth.new @digest_challenges = {} @password = nil # HTTP auth password @user = nil # HTTP auth user + @domain = nil # NTLM HTTP domain # SSL @ca_file = nil @cert = nil @cert_store = nil @@ -262,11 +264,11 @@ # Send the request response = connection.request(uri, request) { |res| response_log res - response_body_io = response_read res, request + response_body_io = response_read res, request, uri res } hook_content_encoding response, uri, response_body_io @@ -390,10 +392,66 @@ when 'file' then return Mechanize::FileConnection.new end end + ## + # Decodes a gzip-encoded +body_io+. If it cannot be decoded, inflate is + # tried followed by raising an error. + + def content_encoding_gunzip body_io + log.debug('gzip response') if log + + zio = Zlib::GzipReader.new body_io + out_io = Tempfile.new 'mechanize-decode' + out_io.unlink + out_io.binmode + + until zio.eof? do + out_io.write zio.read 16384 + end + + zio.finish + + return out_io + rescue Zlib::Error + log.error('unable to gunzip response, trying raw inflate') if log + + body_io.rewind + body_io.read 10 + + begin + return inflate body_io, -Zlib::MAX_WBITS + rescue Zlib::Error => e + log.error("unable to gunzip response: #{e}") if log + raise + end + ensure + zio.close if zio and not zio.closed? + end + + ## + # Decodes a deflate-encoded +body_io+. If it cannot be decoded, raw inflate + # is tried followed by raising an error. + + def content_encoding_inflate body_io + log.debug('deflate body') if log + + return inflate body_io + rescue Zlib::Error + log.error('unable to inflate response, trying raw deflate') if log + + body_io.rewind + + begin + return inflate body_io, -Zlib::MAX_WBITS + rescue Zlib::Error => e + log.error("unable to inflate response: #{e}") if log + raise + end + end + def disable_keep_alive request request['connection'] = 'close' unless @keep_alive end def enable_gzip request @@ -489,15 +547,21 @@ request.each_header do |k, v| log.debug("request-header: #{k} => #{v}") end end + # Sets a Referer header. Fragment part is removed as demanded by + # RFC 2616 14.36, and user information part is removed just like + # major browsers do. def request_referer request, uri, referer return unless referer return if 'https' == referer.scheme.downcase and 'https' != uri.scheme.downcase - + if referer.fragment || referer.user || referer.password + referer = referer.dup + referer.fragment = referer.user = referer.password = nil + end request['Referer'] = referer end def request_user_agent request request['User-Agent'] = @user_agent if @user_agent @@ -600,12 +664,16 @@ def response_authenticate(response, page, uri, request, headers, params, referer) raise Mechanize::UnauthorizedError, page unless @user || @password - challenges = @authenticate_parser.parse response['www-authenticate'] + www_authenticate = response['www-authenticate'] + raise Mechanize::UnauthorizedError, page unless www_authenticate + + challenges = @authenticate_parser.parse www_authenticate + if challenge = challenges.find { |c| c.scheme =~ /^Digest$/i } then realm = challenge.realm uri auth_scheme = if response['server'] =~ /Microsoft-IIS/ then :iis_digest @@ -629,11 +697,11 @@ existing_realms << realm if challenge.params then type_2 = Net::NTLM::Message.decode64 challenge.params - type_3 = type_2.response({ :user => @user, :password => @password, }, + type_3 = type_2.response({ :user => @user, :password => @password, :domain => @domain }, { :ntlmv2 => true }).encode64 headers['Authorization'] = "NTLM #{type_3}" else type_1 = Net::NTLM::Message::Type1.new.encode64 @@ -654,75 +722,46 @@ fetch uri, request.method.downcase.to_sym, headers, params, referer end def response_content_encoding response, body_io - length = response.content_length - - length = case body_io - when IO, Tempfile then - body_io.stat.size - else - body_io.length - end unless length - - out_io = nil - - case response['Content-Encoding'] - when nil, 'none', '7bit' then - out_io = body_io - when 'deflate' then - log.debug('deflate body') if log - - return if length.zero? - - begin - out_io = inflate body_io - rescue Zlib::BufError, Zlib::DataError - log.error('Unable to inflate page, retrying with raw deflate') if log - body_io.rewind - begin - out_io = inflate body_io, -Zlib::MAX_WBITS - rescue Zlib::BufError, Zlib::DataError - log.error("unable to inflate page: #{$!}") if log - nil - end + length = response.content_length || + case body_io + when Tempfile, IO then + body_io.stat.size + else + body_io.length end - when 'gzip', 'x-gzip' then - log.debug('gzip body') if log - return if length.zero? + return body_io if length.zero? - begin - zio = Zlib::GzipReader.new body_io - out_io = Tempfile.new 'mechanize-decode' - out_io.binmode + out_io = case response['Content-Encoding'] + when nil, 'none', '7bit' then + body_io + when 'deflate' then + content_encoding_inflate body_io + when 'gzip', 'x-gzip' then + content_encoding_gunzip body_io + else + raise Mechanize::Error, + "unsupported content-encoding: #{response['Content-Encoding']}" + end - until zio.eof? do - out_io.write zio.read 16384 - end - rescue Zlib::BufError, Zlib::GzipFile::Error - log.error('Unable to gunzip body, trying raw inflate') if log - body_io.rewind - body_io.read 10 - - out_io = inflate body_io, -Zlib::MAX_WBITS - rescue Zlib::DataError - log.error("unable to gunzip page: #{$!}") if log - '' - ensure - zio.close if zio and not zio.closed? - end - else - raise Mechanize::Error, - "Unsupported Content-Encoding: #{response['Content-Encoding']}" - end - out_io.flush out_io.rewind out_io + rescue Zlib::Error => e + message = "error handling content-encoding #{response['Content-Encoding']}:" + message << " #{e.message} (#{e.class})" + raise Mechanize::Error, message + ensure + begin + body_io.close! if Tempfile === body_io and out_io.path != body_io.path + rescue IOError + # HACK ruby 1.8 raises IOError when closing the stream + end end def response_cookies response, uri, page if Mechanize::Page === page and page.body =~ /Set-Cookie/n page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta| @@ -776,15 +815,16 @@ def response_parse response, body_io, uri @context.parse uri, response, body_io end - def response_read response, request + def response_read response, request, uri content_length = response.content_length if content_length and content_length > @max_file_buffer then body_io = Tempfile.new 'mechanize-raw' + body_io.unlink body_io.binmode if defined? body_io.binmode else body_io = StringIO.new end @@ -795,11 +835,12 @@ response.read_body { |part| total += part.length if StringIO === body_io and total > @max_file_buffer then new_io = Tempfile.new 'mechanize-raw' - new_io.binmode if defined? binmode + new_io.unlink + new_io.binmode new_io.write body_io.string body_io = new_io end @@ -807,10 +848,11 @@ body_io.write(part) log.debug("Read #{part.length} bytes (#{total} total)") if log } rescue Net::HTTP::Persistent::Error => e body_io.rewind - raise Mechanize::ResponseReadError.new(e, response, body_io) + raise Mechanize::ResponseReadError.new(e, response, body_io, uri, + @context) end body_io.flush body_io.rewind