lib/mechanize/http/agent.rb in mechanize-2.5.1 vs lib/mechanize/http/agent.rb in mechanize-2.6.0
- old
+ new
@@ -67,10 +67,16 @@
attr_accessor :redirect_ok
# Maximum number of redirects to follow
attr_accessor :redirection_limit
+ # :section: Allowed error codes
+
+ # List of error codes to handle without raising an exception.
+
+ attr_accessor :allowed_error_codes
+
# :section: Robots
# When true, this agent will consult the site's robots.txt for each access.
attr_reader :robots
@@ -118,10 +124,11 @@
# Creates a new Mechanize HTTP user agent. The user agent is an
# implementation detail of mechanize and its API may change at any time.
def initialize
+ @allowed_error_codes = []
@conditional_requests = true
@context = nil
@content_encoding_hooks = []
@cookie_jar = Mechanize::CookieJar.new
@follow_meta_refresh = false
@@ -237,16 +244,14 @@
if robots && uri.is_a?(URI::HTTP)
robots_allowed?(uri) or raise Mechanize::RobotsDisallowedError.new(uri)
end
# Add If-Modified-Since if page is in history
- page = visited_page(uri)
+ if page = visited_page(uri) and last_modified = page.response['Last-Modified']
+ request['If-Modified-Since'] = last_modified
+ end if @conditional_requests
- if (page = visited_page(uri)) and page.response['Last-Modified']
- request['If-Modified-Since'] = page.response['Last-Modified']
- end if(@conditional_requests)
-
# Specify timeouts if given
connection.open_timeout = @open_timeout if @open_timeout
connection.read_timeout = @read_timeout if @read_timeout
request_log request
@@ -281,16 +286,16 @@
response_cookies response, uri, page
meta = response_follow_meta_refresh response, uri, page, redirects
return meta if meta
+ if robots && page.is_a?(Mechanize::Page)
+ page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri)
+ end
+
case response
when Net::HTTPSuccess
- if robots && page.is_a?(Mechanize::Page)
- page.parser.noindex? and raise Mechanize::RobotsDisallowedError.new(uri)
- end
-
page
when Mechanize::FileResponse
page
when Net::HTTPNotModified
log.debug("Got cached page") if log
@@ -299,11 +304,15 @@
response_redirect response, method, page, redirects, headers, referer
when Net::HTTPUnauthorized
response_authenticate(response, page, uri, request, headers, params,
referer)
else
- raise Mechanize::ResponseCodeError.new(page, 'unhandled response')
+ if @allowed_error_codes.any? {|code| code.to_s == page.code} then
+ page
+ else
+ raise Mechanize::ResponseCodeError.new(page, 'unhandled response')
+ end
end
end
# URI for a proxy connection
@@ -401,10 +410,15 @@
when 'file' then
return Mechanize::FileConnection.new
end
end
+ # Closes all open connections for this agent.
+ def shutdown
+ http.shutdown
+ end
+
##
# Decodes a gzip-encoded +body_io+. If it cannot be decoded, inflate is
# tried followed by raising an error.
def content_encoding_gunzip body_io
@@ -429,11 +443,11 @@
log.error "unable to inflate response: #{e} (#{e.class})" if log
raise
end
ensure
# do not close a second time if we failed the first time
- zio.close if zio and not (zio.closed? or gz_error)
+ zio.close if zio and !(zio.closed? or gz_error)
body_io.close unless body_io.closed?
end
##
# Decodes a deflate-encoded +body_io+. If it cannot be decoded, raw inflate
@@ -620,10 +634,12 @@
rescue
uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_url))
end
end
+ uri.host = referer_uri.host if referer_uri && URI::HTTP === uri && uri.host.nil?
+
scheme = uri.relative? ? 'relative' : uri.scheme.downcase
uri = @scheme_handlers[scheme].call(uri, referer)
if referer_uri
if uri.path.length == 0 && uri.relative?
@@ -694,11 +710,11 @@
unless www_authenticate = response['www-authenticate'] then
message = 'WWW-Authenticate header missing in response'
raise Mechanize::UnauthorizedError.new(page, nil, message)
end
-
+
challenges = @authenticate_parser.parse www_authenticate
unless @auth_store.credentials? uri, challenges then
message = "no credentials found, provide some with #add_auth"
raise Mechanize::UnauthorizedError.new(page, challenges, message)
@@ -798,11 +814,11 @@
raise Mechanize::Error, message
ensure
begin
if Tempfile === body_io and
(StringIO === out_io or out_io.path != body_io.path) then
- body_io.close!
+ body_io.close!
end
rescue IOError
# HACK ruby 1.8 raises IOError when closing the stream
end
end
@@ -843,11 +859,11 @@
redirects + 1 > @redirection_limit
sleep delay
@history.push(page, page.uri)
fetch new_url, :get, {}, [],
- Mechanize::Page.new, redirects
+ Mechanize::Page.new, redirects + 1
end
def response_log response
return unless log
@@ -1145,11 +1161,11 @@
inflate.finish
out_io
ensure
- inflate.close
+ inflate.close if inflate.finished?
end
def log
@context.log
end
@@ -1196,9 +1212,14 @@
def use_tempfile? size
return false unless @max_file_buffer
return false unless size
size >= @max_file_buffer
+ end
+
+ def reset
+ @cookie_jar.clear!
+ @history.clear
end
end
require 'mechanize/http/auth_store'