lib/mechanize/http/agent.rb in mechanize-2.5.1 vs lib/mechanize/http/agent.rb in mechanize-2.6.0

- old
+ new

@@ -67,10 +67,16 @@ attr_accessor :redirect_ok # Maximum number of redirects to follow attr_accessor :redirection_limit + # :section: Allowed error codes + + # List of error codes to handle without raising an exception. + + attr_accessor :allowed_error_codes + # :section: Robots # When true, this agent will consult the site's robots.txt for each access. attr_reader :robots @@ -118,10 +124,11 @@ # Creates a new Mechanize HTTP user agent. The user agent is an # implementation detail of mechanize and its API may change at any time. def initialize + @allowed_error_codes = [] @conditional_requests = true @context = nil @content_encoding_hooks = [] @cookie_jar = Mechanize::CookieJar.new @follow_meta_refresh = false @@ -237,16 +244,14 @@ if robots && uri.is_a?(URI::HTTP) robots_allowed?(uri) or raise Mechanize::RobotsDisallowedError.new(uri) end # Add If-Modified-Since if page is in history - page = visited_page(uri) + if page = visited_page(uri) and last_modified = page.response['Last-Modified'] + request['If-Modified-Since'] = last_modified + end if @conditional_requests - if (page = visited_page(uri)) and page.response['Last-Modified'] - request['If-Modified-Since'] = page.response['Last-Modified'] - end if(@conditional_requests) - # Specify timeouts if given connection.open_timeout = @open_timeout if @open_timeout connection.read_timeout = @read_timeout if @read_timeout request_log request @@ -281,16 +286,16 @@ response_cookies response, uri, page meta = response_follow_meta_refresh response, uri, page, redirects return meta if meta + if robots && page.is_a?(Mechanize::Page) + page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri) + end + case response when Net::HTTPSuccess - if robots && page.is_a?(Mechanize::Page) - page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri) - end - page when Mechanize::FileResponse page when Net::HTTPNotModified log.debug("Got cached page") if log @@ -299,11 +304,15 @@ response_redirect response, method, page, redirects, headers, referer when Net::HTTPUnauthorized response_authenticate(response, page, uri, request, headers, params, referer) else - raise Mechanize::ResponseCodeError.new(page, 'unhandled response') + if @allowed_error_codes.any? {|code| code.to_s == page.code} then + page + else + raise Mechanize::ResponseCodeError.new(page, 'unhandled response') + end end end # URI for a proxy connection @@ -401,10 +410,15 @@ when 'file' then return Mechanize::FileConnection.new end end + # Closes all open connections for this agent. + def shutdown + http.shutdown + end + ## # Decodes a gzip-encoded +body_io+. If it cannot be decoded, inflate is # tried followed by raising an error. def content_encoding_gunzip body_io @@ -429,11 +443,11 @@ log.error "unable to inflate response: #{e} (#{e.class})" if log raise end ensure # do not close a second time if we failed the first time - zio.close if zio and not (zio.closed? or gz_error) + zio.close if zio and !(zio.closed? or gz_error) body_io.close unless body_io.closed? end ## # Decodes a deflate-encoded +body_io+. If it cannot be decoded, raw inflate @@ -620,10 +634,12 @@ rescue uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_url)) end end + uri.host = referer_uri.host if referer_uri && URI::HTTP === uri && uri.host.nil? + scheme = uri.relative? ? 'relative' : uri.scheme.downcase uri = @scheme_handlers[scheme].call(uri, referer) if referer_uri if uri.path.length == 0 && uri.relative? @@ -694,11 +710,11 @@ unless www_authenticate = response['www-authenticate'] then message = 'WWW-Authenticate header missing in response' raise Mechanize::UnauthorizedError.new(page, nil, message) end - + challenges = @authenticate_parser.parse www_authenticate unless @auth_store.credentials? uri, challenges then message = "no credentials found, provide some with #add_auth" raise Mechanize::UnauthorizedError.new(page, challenges, message) @@ -798,11 +814,11 @@ raise Mechanize::Error, message ensure begin if Tempfile === body_io and (StringIO === out_io or out_io.path != body_io.path) then - body_io.close! + body_io.close! end rescue IOError # HACK ruby 1.8 raises IOError when closing the stream end end @@ -843,11 +859,11 @@ redirects + 1 > @redirection_limit sleep delay @history.push(page, page.uri) fetch new_url, :get, {}, [], - Mechanize::Page.new, redirects + Mechanize::Page.new, redirects + 1 end def response_log response return unless log @@ -1145,11 +1161,11 @@ inflate.finish out_io ensure - inflate.close + inflate.close if inflate.finished? end def log @context.log end @@ -1196,9 +1212,14 @@ def use_tempfile? size return false unless @max_file_buffer return false unless size size >= @max_file_buffer + end + + def reset + @cookie_jar.clear! + @history.clear end end require 'mechanize/http/auth_store'