mechanize.rb in mechanize-2.0

- old
+ new

@@ -41,10 +41,11 @@
   ruby_version = if RUBY_PATCHLEVEL >= 0 then
                    "#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
                  else
                    "#{RUBY_VERSION}dev#{RUBY_REVISION}"
                  end
+
   ##
   # User Agent aliases
 
   AGENT_ALIASES = {
     'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
@@ -59,104 +60,214 @@
     'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
     'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)"
   }
 
   # A Mechanize::CookieJar which stores cookies
-  attr_accessor :cookie_jar
 
+  def cookie_jar
+    @agent.cookie_jar
+  end
+
+  def cookie_jar= cookie_jar
+    @agent.cookie_jar = cookie_jar
+  end
+
   # Length of time to wait until a connection is opened in seconds
-  attr_accessor :open_timeout
+  def open_timeout
+    @agent.open_timeout
+  end
 
+  def open_timeout= open_timeout
+    @agent.open_timeout = open_timeout
+  end
+
   # Length of time to attempt to read data from the server
-  attr_accessor  :read_timeout
+  def read_timeout
+    @agent.read_timeout
+  end
 
+  def read_timeout= read_timeout
+    @agent.read_timeout = read_timeout
+  end
+
   # The identification string for the client initiating a web request
-  attr_reader :user_agent
+  def user_agent
+    @agent.user_agent
+  end
 
   # The value of watch_for_set is passed to pluggable parsers for retrieved
   # content
   attr_accessor :watch_for_set
 
   # Path to an OpenSSL server certificate file
-  attr_accessor :ca_file
+  def ca_file
+    @agent.ca_file
+  end
 
+  def ca_file= ca_file
+    @agent.ca_file = ca_file
+  end
+
+  def certificate
+    @agent.certificate
+  end
+
   # An OpenSSL private key or the path to a private key
-  attr_accessor :key
+  def key
+    @agent.key
+  end
 
+  def key= key
+    @agent.key = key
+  end
+
   # An OpenSSL client certificate or the path to a certificate file.
-  attr_accessor :cert
+  def cert
+    @agent.cert
+  end
 
+  def cert= cert
+    @agent.cert = cert
+  end
+
   # OpenSSL key password
-  attr_accessor :pass
+  def pass
+    @agent.pass
+  end
 
-  # Controls how this agent deals with redirects.  If it is set to
-  # true or :all, all 3xx redirects are automatically followed.  This
-  # is the default behavior.  If it is :permanent, only 301 (Moved
-  # Permanently) redirects are followed.  If it is a false value, no
-  # redirects are followed.
-  attr_accessor :redirect_ok
+  def pass= pass
+    @agent.pass = pass
+  end
 
-  # Says this agent should consult the site's robots.txt for each access.
-  attr_reader :robots
+  # Controls how this agent deals with redirects.  The following values are
+  # allowed:
+  #
+  # :all, true:: All 3xx redirects are followed (default)
+  # :permanent:: Only 301 Moved Permanantly redirects are followed
+  # false:: No redirects are followed
 
-  def robots=(value)
-    require 'webrobots' if value
-    @webrobots = nil if value != @robots
-    @robots = value
+  def redirect_ok
+    @agent.redirect_ok
   end
 
+  def redirect_ok= follow
+    @agent.redirect_ok = follow
+  end
+
+  def gzip_enabled
+    @agent.gzip_enabled
+  end
+
   # Disables HTTP/1.1 gzip compression (enabled by default)
-  attr_accessor :gzip_enabled
+  def gzip_enabled=enabled
+    @agent.gzip_enabled = enabled
+  end
 
   # HTTP/1.0 keep-alive time
-  attr_accessor :keep_alive_time
+  def keep_alive_time
+    @agent.keep_alive_time
+  end
 
+  def keep_alive_time= keep_alive_time
+    @agent.keep_alive_time = keep_alive_time
+  end
+
   # HTTP/1.1 keep-alives are always active.  This does nothing.
   attr_accessor :keep_alive
 
+  def conditional_requests
+    @agent.conditional_requests
+  end
+
   # Disables If-Modified-Since conditional requests (enabled by default)
-  attr_accessor :conditional_requests
+  def conditional_requests= enabled
+    @agent.conditional_requests = enabled
+  end
 
-  # Follow HTML meta refresh
-  attr_accessor :follow_meta_refresh
+  # Follow HTML meta refresh.  If set to +:anywhere+ meta refresh tags outside
+  # of the head element will be followed.
+  def follow_meta_refresh
+    @agent.follow_meta_refresh
+  end
 
+  def follow_meta_refresh= follow
+    @agent.follow_meta_refresh = follow
+  end
+
   # A callback for additional certificate verification.  See
   # OpenSSL::SSL::SSLContext#verify_callback
-  attr_accessor :verify_callback
+  #
+  # The callback can be used for debugging or to ignore errors by always
+  # returning +true+.  Specifying nil uses the default method that was valid
+  # when the SSLContext was created
+  def verify_callback
+    @agent.verify_callback
+  end
 
+  def verify_callback= verify_callback
+    @agent.verify_callback = verify_callback
+  end
+
   attr_accessor :history_added
-  attr_accessor :scheme_handlers
-  attr_accessor :redirection_limit
 
+  def redirection_limit
+    @agent.redirection_limit
+  end
+
+  def redirection_limit= limit
+    @agent.redirection_limit = limit
+  end
+
+  def scheme_handlers
+    @agent.scheme_handlers
+  end
+
+  def scheme_handlers= scheme_handlers
+    @agent.scheme_handlers = scheme_handlers
+  end
+
   # A hash of custom request headers
-  attr_accessor :request_headers
+  def request_headers
+    @agent.request_headers
+  end
 
+  def request_headers= request_headers
+    @agent.request_headers = request_headers
+  end
+
   # Proxy settings
   attr_reader :proxy_addr
   attr_reader :proxy_pass
   attr_reader :proxy_port
   attr_reader :proxy_user
 
   # The HTML parser to be used when parsing documents
   attr_accessor :html_parser
 
-  attr_reader :http # :nodoc:
+  attr_reader :agent # :nodoc:
 
-  attr_reader :history
+  def history
+    @agent.history
+  end
+
   attr_reader :pluggable_parser
 
   # A list of hooks to call after retrieving a response.  Hooks are called with
   # the agent and the response returned.
 
-  attr_reader :post_connect_hooks
+  def post_connect_hooks
+    @agent.post_connect_hooks
+  end
 
   # A list of hooks to call before making a request.  Hooks are called with
   # the agent and the request to be performed.
 
-  attr_reader :pre_connect_hooks
+  def pre_connect_hooks
+    @agent.pre_connect_hooks
+  end
 
-  alias :follow_redirect? :redirect_ok
+  alias follow_redirect? redirect_ok
 
   @html_parser = Nokogiri::HTML
   class << self
     attr_accessor :html_parser, :log
 
@@ -165,127 +276,82 @@
       child.log ||= log
       super
     end
   end
 
+  # A default encoding name used when parsing HTML parsing.  When set it is
+  # used after any other encoding.  The default is nil.
+
+  attr_accessor :default_encoding
+
+  # Overrides the encodings given by the HTTP server and the HTML page with
+  # the default_encoding when set to true.
+  attr_accessor :force_default_encoding
+
   def initialize
+    @agent = Mechanize::HTTP::Agent.new
+    @agent.context = self
+
     # attr_accessors
-    @cookie_jar     = CookieJar.new
-    @log            = nil
-    @open_timeout   = nil
-    @read_timeout   = nil
-    @user_agent     = AGENT_ALIASES['Mechanize']
-    @watch_for_set  = nil
-    @history_added  = nil
-    @ca_file        = nil # OpenSSL server certificate file
+    @agent.user_agent = AGENT_ALIASES['Mechanize']
+    @watch_for_set    = nil
+    @history_added    = nil
 
-    # callback for OpenSSL errors while verifying the server certificate
-    # chain, can be used for debugging or to ignore errors by always
-    # returning _true_
-    # specifying nil uses the default method that was valid when the SSL was created
-    @verify_callback = nil
-    @cert           = nil # OpenSSL Certificate
-    @key            = nil # OpenSSL Private Key
-    @pass           = nil # OpenSSL Password
-    @redirect_ok    = true
-    @gzip_enabled   = true
-
     # attr_readers
-    @history        = Mechanize::History.new
     @pluggable_parser = PluggableParser.new
 
-    # Auth variables
-    @user           = nil # Auth User
-    @password       = nil # Auth Password
-    @digest         = nil # DigestAuth Digest
-    @digest_auth    = Net::HTTP::DigestAuth.new
-    @auth_hash      = {}  # Keep track of urls for sending auth
-    @request_headers= {}  # A hash of request headers to be used
-
-    @conditional_requests = true
-
-    @follow_meta_refresh  = false
-    @redirection_limit    = 20
-
-    @robots         = false
-    @webrobots      = nil
-
-    # Connection Cache & Keep alive
-    @keep_alive_time  = 300
     @keep_alive       = true
 
     # Proxy
     @proxy_addr = nil
     @proxy_port = nil
     @proxy_user = nil
     @proxy_pass = nil
 
-    @scheme_handlers = Hash.new { |h, scheme|
-      h[scheme] = lambda { |link, page|
-        raise Mechanize::UnsupportedSchemeError, scheme
-      }
-    }
+    @html_parser = self.class.html_parser
 
-    @scheme_handlers['http']      = lambda { |link, page| link }
-    @scheme_handlers['https']     = @scheme_handlers['http']
-    @scheme_handlers['relative']  = @scheme_handlers['http']
-    @scheme_handlers['file']      = @scheme_handlers['http']
+    @default_encoding = nil
+    @force_default_encoding = false
 
-    @pre_connect_hooks = []
-    @post_connect_hooks = []
+    yield self if block_given?
 
-    @html_parser = self.class.html_parser
+    @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
+    @agent.set_http
+  end
 
-    yield self if block_given?
+  def max_history
+    @agent.history.max_size
+  end
 
-    if @proxy_addr and @proxy_pass then
-      set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
-    else
-      set_http
-    end
+  def max_history= length
+    @agent.history.max_size = length
   end
 
-  def max_history=(length); @history.max_size = length end
-  def max_history; @history.max_size end
   def log=(l); self.class.log = l end
   def log; self.class.log end
 
-  # Sets the proxy address, port, user, and password
-  # +addr+ should be a host, with no "http://"
-  def set_proxy(addr, port, user = nil, pass = nil)
-    proxy = URI.parse "http://#{addr}"
-    proxy.port = port
-    proxy.user     = user if user
-    proxy.password = pass if pass
-
-    set_http proxy
-
-    nil
+  def user_agent= user_agent
+    @agent.user_agent = user_agent
   end
 
-  def user_agent=(value)
-    @webrobots = nil if value != @user_agent
-    @user_agent = value
-  end
-
-  # Set the user agent for the Mechanize object.
-  # See AGENT_ALIASES
+  # Set the user agent for the Mechanize object.  See AGENT_ALIASES
   def user_agent_alias=(al)
-    @user_agent = AGENT_ALIASES[al] ||
-      raise(ArgumentError, "unknown agent alias")
+    self.user_agent = AGENT_ALIASES[al] ||
+      raise(ArgumentError, "unknown agent alias #{al.inspect}")
   end
 
   # Returns a list of cookies stored in the cookie jar.
   def cookies
-    @cookie_jar.to_a
+    @agent.cookie_jar.to_a
   end
 
   # Sets the user and password to be used for authentication.
   def auth(user, password)
-    @user       = user
-    @password   = password
+    @agent.user     = user
+    @agent.password = password
   end
+
   alias :basic_auth :auth
 
   # Fetches the URL passed in and returns a page.
   def get(uri, parameters = [], referer = nil, headers = {})
     method = :get
@@ -300,17 +366,16 @@
       referer    = options[:referer]
       headers    = options[:headers]
       method     = options[:verb] || method
     end
 
-    unless referer
+    referer ||=
       if uri.to_s =~ %r{\Ahttps?://}
-        referer = Page.new(nil, {'content-type'=>'text/html'})
+        Page.new(nil, {'content-type'=>'text/html'})
       else
-        referer = current_page || Page.new(nil, {'content-type'=>'text/html'})
+        current_page || Page.new(nil, {'content-type'=>'text/html'})
       end
-    end
 
     # FIXME: Huge hack so that using a URI as a referer works.  I need to
     # refactor everything to pass around URIs but still support
     # Mechanize::Page#base
     unless referer.is_a?(Mechanize::File)
@@ -319,11 +384,11 @@
         Page.new(referer, {'content-type' => 'text/html'})
     end
 
     # fetch the page
     headers ||= {}
-    page = fetch_page uri, method, headers, parameters, referer
+    page = @agent.fetch uri, method, headers, parameters, referer
     add_to_history(page)
     yield page if block_given?
     page
   end
 
@@ -340,11 +405,11 @@
   # DELETE to +url+ with +query_params+, and setting +headers+:
   #
   #   delete('http://example/', {'q' => 'foo'}, {})
   #
   def delete(uri, query_params = {}, headers = {})
-    page = fetch_page(uri, :delete, headers, query_params)
+    page = @agent.fetch(uri, :delete, headers, query_params)
     add_to_history(page)
     page
   end
 
   ##
@@ -352,11 +417,11 @@
   #
   #   head('http://example/', {'q' => 'foo'}, {})
   #
   def head(uri, query_params = {}, headers = {})
     # fetch the page
-    page = fetch_page(uri, :head, headers, query_params)
+    page = @agent.fetch(uri, :head, headers, query_params)
     yield page if block_given?
     page
   end
 
   # Fetch a file and return the contents of the file.
@@ -369,16 +434,22 @@
   # Mechanize::Page::Link object passed in. Returns the page fetched.
   def click(link)
     case link
     when Page::Link
       referer = link.page || current_page()
-      if robots
+      if @agent.robots
         if (referer.is_a?(Page) && referer.parser.nofollow?) || link.rel?('nofollow')
           raise RobotsDisallowedError.new(link.href)
         end
       end
-      get link.href, [], referer
+      if link.rel?('noreferrer')
+        href = @agent.resolve(link.href, link.page || current_page)
+        referer = Page.new(nil, {'content-type'=>'text/html'})
+      else
+        href = link.href
+      end
+      get href, [], referer
     when String, Regexp
       if real_link = page.link_with(:text => link)
         click real_link
       else
         button = nil
@@ -397,11 +468,11 @@
   end
 
   # Equivalent to the browser back button.  Returns the most recent page
   # visited.
   def back
-    @history.pop
+    @agent.history.pop
   end
 
   # Posts to the given URL with the request entity.  The request
   # entity is specified by either a string, or a list of key-value
   # pairs represented by a hash or an array of arrays.
@@ -466,515 +537,72 @@
     headers = {
       'Content-Type' => 'application/octet-stream',
       'Content-Length' => entity.size.to_s,
     }.update headers
 
-    page = fetch_page uri, verb, headers, [entity], cur_page
+    page = @agent.fetch uri, verb, headers, [entity], cur_page
     add_to_history(page)
     page
   end
 
   # Returns the current page loaded by Mechanize
   def current_page
-    @history.last
+    @agent.current_page
   end
 
-  # Returns whether or not a url has been visited
-  def visited?(url)
-    ! visited_page(url).nil?
-  end
-
   # Returns a visited page for the url passed in, otherwise nil
   def visited_page(url)
-    if url.respond_to? :href
-      url = url.href
-    end
-    @history.visited_page(resolve(url))
-  end
+    url = url.href if url.respond_to? :href
 
-  # Runs given block, then resets the page history as it was before. self is
-  # given as a parameter to the block. Returns the value of the block.
-  def transact
-    history_backup = @history.dup
-    begin
-      yield self
-    ensure
-      @history = history_backup
-    end
+    @agent.visited_page url
   end
 
-  # Tests if this agent is allowed to access +url+, consulting the
-  # site's robots.txt.
-  def robots_allowed?(uri)
-    return true if uri.request_uri == '/robots.txt'
+  # Returns whether or not a url has been visited
+  alias visited? visited_page
 
-    webrobots.allowed?(uri)
-  end
-
-  # Equivalent to !robots_allowed?(url).
-  def robots_disallowed?(url)
-    !webrobots.allowed?(url)
-  end
-
-  # Returns an error object if there is an error in fetching or
-  # parsing robots.txt of the site +url+.
-  def robots_error(url)
-    webrobots.error(url)
-  end
-
-  # Raises the error if there is an error in fetching or parsing
-  # robots.txt of the site +url+.
-  def robots_error!(url)
-    webrobots.error!(url)
-  end
-
-  # Removes robots.txt cache for the site +url+.
-  def robots_reset(url)
-    webrobots.reset(url)
-  end
-
-  alias :page :current_page
-
-  def connection_for uri
-    case uri.scheme.downcase
-    when 'http', 'https' then
-      return @http
-    when 'file' then
-      return Mechanize::FileConnection.new
-    end
-  end
-
-  def enable_gzip request
-    request['accept-encoding'] = if @gzip_enabled
-                                   'gzip,deflate,identity'
-                                 else
-                                   'identity'
-                                 end
-  end
-
-  def http_request uri, method, params = nil
-    case uri.scheme.downcase
-    when 'http', 'https' then
-      klass = Net::HTTP.const_get(method.to_s.capitalize)
-
-      request ||= klass.new(uri.request_uri)
-      request.body = params.first if params
-
-      request
-    when 'file' then
-      Mechanize::FileRequest.new uri
-    end
-  end
-
-  ##
-  # Invokes hooks added to post_connect_hooks after a +response+ is returned.
-  # Yields the +agent+ and the +response+ returned to each hook.
-
-  def post_connect response # :yields: agent, response
-    @post_connect_hooks.each do |hook|
-      hook.call self, response
-    end
-  end
-
-  ##
-  # Invokes hooks added to pre_connect_hooks before a +request+ is made.
-  # Yields the +agent+ and the +request+ that will be performed to each hook.
-
-  def pre_connect request # :yields: agent, request
-    @pre_connect_hooks.each do |hook|
-      hook.call self, request
-    end
-  end
-
-  def request_auth request, uri
-    auth_type = @auth_hash[uri.host]
-
-    return unless auth_type
-
-    case auth_type
-    when :basic
-      request.basic_auth @user, @password
-    when :digest, :iis_digest
-      uri.user = @user
-      uri.password = @password
-
-      iis = auth_type == :iis_digest
-
-      auth = @digest_auth.auth_header uri, @digest, request.method, iis
-
-      request['Authorization'] = auth
-    end
-  end
-
-  def request_cookies request, uri
-    return if @cookie_jar.empty? uri
-
-    cookies = @cookie_jar.cookies uri
-
-    return if cookies.empty?
-
-    request.add_field 'Cookie', cookies.join('; ')
-  end
-
-  def request_host request, uri
-    port = [80, 443].include?(uri.port.to_i) ? nil : uri.port
-    host = uri.host
-
-    request['Host'] = [host, port].compact.join ':'
-  end
-
-  def request_language_charset request
-    request['accept-charset']  = 'ISO-8859-1,utf-8;q=0.7,*;q=0.7'
-    request['accept-language'] = 'en-us,en;q=0.5'
-  end
-
-  # Log specified headers for the request
-  def request_log request
-    return unless log
-
-    log.info("#{request.class}: #{request.path}")
-
-    request.each_header do |k, v|
-      log.debug("request-header: #{k} => #{v}")
-    end
-  end
-
-  def request_add_headers request, headers = {}
-    @request_headers.each do |k,v|
-      request[k] = v
-    end
-
-    headers.each do |field, value|
-      case field
-      when :etag              then request["ETag"] = value
-      when :if_modified_since then request["If-Modified-Since"] = value
-      when Symbol then
-        raise ArgumentError, "unknown header symbol #{field}"
-      else
-        request[field] = value
-      end
-    end
-  end
-
-  def request_referer request, uri, referer
-    return unless referer
-    return if 'https' == referer.scheme.downcase and
-              'https' != uri.scheme.downcase
-
-    request['Referer'] = referer
-  end
-
-  def request_user_agent request
-    request['User-Agent'] = @user_agent if @user_agent
-  end
-
-  def resolve(uri, referer = current_page())
-    uri = uri.dup if uri.is_a?(URI)
-
-    unless uri.is_a?(URI)
-      uri = uri.to_s.strip.gsub(/[^#{0.chr}-#{126.chr}]/o) { |match|
-        if RUBY_VERSION >= "1.9.0"
-          Mechanize::Util.uri_escape(match)
-        else
-          sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C')[0])
-        end
-      }
-
-      unescaped = uri.split(/(?:%[0-9A-Fa-f]{2})+|#/)
-      escaped   = uri.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
-
-      escaped_uri = Mechanize::Util.html_unescape(
-        unescaped.zip(escaped).map { |x,y|
-          "#{WEBrick::HTTPUtils.escape(x)}#{y}"
-        }.join('')
-      )
-
-      begin
-        uri = URI.parse(escaped_uri)
-      rescue
-        uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_uri))
-      end
-    end
-
-    scheme = uri.relative? ? 'relative' : uri.scheme.downcase
-    uri = @scheme_handlers[scheme].call(uri, referer)
-
-    if referer && referer.uri
-      if uri.path.length == 0 && uri.relative?
-        uri.path = referer.uri.path
-      end
-    end
-
-    uri.path = '/' if uri.path.length == 0
-
-    if uri.relative?
-      raise ArgumentError, "absolute URL needed (not #{uri})" unless
-        referer && referer.uri
-
-      base = nil
-      if referer.respond_to?(:bases) && referer.parser
-        base = referer.bases.last
-      end
-
-      uri = ((base && base.uri && base.uri.absolute?) ?
-             base.uri :
-             referer.uri) + uri
-      uri = referer.uri + uri
-      # Strip initial "/.." bits from the path
-      uri.path.sub!(/^(\/\.\.)+(?=\/)/, '')
-    end
-
-    unless ['http', 'https', 'file'].include?(uri.scheme.downcase)
-      raise ArgumentError, "unsupported scheme: #{uri.scheme}"
-    end
-
-    uri
-  end
-
-  def resolve_parameters uri, method, parameters
-    case method
-    when :head, :get, :delete, :trace then
-      if parameters and parameters.length > 0
-        uri.query ||= ''
-        uri.query << '&' if uri.query.length > 0
-        uri.query << Mechanize::Util.build_query_string(parameters)
-      end
-
-      return uri, nil
-    end
-
-    return uri, parameters
-  end
-
-  def response_cookies response, uri, page
-    if Mechanize::Page === page and page.body =~ /Set-Cookie/n
-      page.search('//head/meta[@http-equiv="Set-Cookie"]').each do |meta|
-        Mechanize::Cookie.parse(uri, meta['content']) { |c|
-          log.debug("saved cookie: #{c}") if log
-          @cookie_jar.add(uri, c)
-        }
-      end
-    end
-
-    header_cookies = response.get_fields 'Set-Cookie'
-
-    return unless header_cookies
-
-    header_cookies.each do |cookie|
-      Mechanize::Cookie.parse(uri, cookie) { |c|
-        log.debug("saved cookie: #{c}") if log
-        @cookie_jar.add(uri, c)
-      }
-    end
-  end
-
-  def response_follow_meta_refresh response, uri, page, redirects
-    return unless @follow_meta_refresh
-
-    redirect_uri  = nil
-    referer       = page
-
-    if page.respond_to?(:meta) and (redirect = page.meta.first)
-      redirect_uri = Mechanize::Util.uri_unescape redirect.uri.to_s
-      sleep redirect.node['delay'].to_f
-      referer = Page.new(nil, {'content-type'=>'text/html'})
-    elsif refresh = response['refresh']
-      delay, redirect_uri = Page::Meta.parse(refresh, uri)
-      raise Mechanize::Error, 'Invalid refresh http header' unless delay
-      raise RedirectLimitReachedError.new(page, redirects) if
-        redirects + 1 > redirection_limit
-      sleep delay.to_f
-    end
-
-    if redirect_uri
-      @history.push(page, page.uri)
-      fetch_page(redirect_uri, :get, {}, [], referer, redirects + 1)
-    end
-  end
-
-  def response_log response
-    return unless log
-
-    log.info("status: #{response.class} #{response.http_version} " \
-             "#{response.code} #{response.message}")
-
-    response.each_header do |k, v|
-      log.debug("response-header: #{k} => #{v}")
-    end
-  end
-
-  def response_parse response, body, uri
+  def parse uri, response, body
     content_type = nil
 
     unless response['Content-Type'].nil?
       data, = response['Content-Type'].split ';', 2
       content_type, = data.downcase.split ',', 2 unless data.nil?
     end
 
     # Find our pluggable parser
-    parser_klass = @pluggable_parser.parser(content_type)
+    parser_klass = @pluggable_parser.parser content_type
 
-    parser_klass.new(uri, response, body, response.code) { |parser|
+    parser_klass.new uri, response, body, response.code do |parser|
       parser.mech = self if parser.respond_to? :mech=
-      if @watch_for_set and parser.respond_to?(:watch_for_set=)
-        parser.watch_for_set = @watch_for_set
-      end
-    }
-  end
 
-  def response_read response, request
-    body = StringIO.new
-    body.set_encoding Encoding::BINARY if body.respond_to? :set_encoding
-    total = 0
-
-    response.read_body { |part|
-      total += part.length
-      body.write(part)
-      log.debug("Read #{part.length} bytes (#{total} total)") if log
-    }
-
-    body.rewind
-
-    raise Mechanize::ResponseCodeError, response if
-      Net::HTTPUnknownResponse === response
-
-    content_length = response.content_length
-
-    unless Net::HTTP::Head === request or Net::HTTPRedirection === response then
-      raise EOFError, "Content-Length (#{content_length}) does not match " \
-                      "response body length (#{body.length})" if
-        content_length and content_length != body.length
+      parser.watch_for_set = @watch_for_set if
+        @watch_for_set and parser.respond_to?(:watch_for_set=)
     end
-
-    case response['Content-Encoding']
-    when nil, 'none', '7bit' then
-      body.string
-    when 'deflate' then
-      log.debug('deflate body') if log
-
-      if content_length > 0 or body.length > 0 then
-        begin
-            Zlib::Inflate.inflate body.string
-        rescue Zlib::BufError, Zlib::DataError
-          log.error('Unable to inflate page, retrying with raw deflate') if log
-          begin
-            Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.string)
-          rescue Zlib::BufError, Zlib::DataError
-            log.error("unable to inflate page: #{$!}") if log
-            ''
-          end
-        end
-      end
-    when 'gzip', 'x-gzip' then
-      log.debug('gzip body') if log
-
-      if content_length > 0 or body.length > 0 then
-        begin
-          zio = Zlib::GzipReader.new body
-          zio.read
-        rescue Zlib::BufError, Zlib::GzipFile::Error
-          log.error('Unable to gunzip body, trying raw inflate') if log
-          body.rewind
-          body.read 10
-          Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(body.read)
-        rescue Zlib::DataError
-          log.error("unable to gunzip page: #{$!}") if log
-          ''
-        ensure
-          zio.close if zio and not zio.closed?
-        end
-      end
-    else
-      raise Mechanize::Error,
-            "Unsupported Content-Encoding: #{response['Content-Encoding']}"
-    end
   end
 
-  def response_redirect response, method, page, redirects
-    case @redirect_ok
-    when true, :all
-      # shortcut
-    when false, nil
-      return page
-    when :permanent
-      return page if response_class != Net::HTTPMovedPermanently
+  # Runs given block, then resets the page history as it was before. self is
+  # given as a parameter to the block. Returns the value of the block.
+  def transact
+    history_backup = @agent.history.dup
+    begin
+      yield self
+    ensure
+      @agent.history = history_backup
     end
-
-    log.info("follow redirect to: #{response['Location']}") if log
-
-    from_uri = page.uri
-
-    raise RedirectLimitReachedError.new(page, redirects) if
-      redirects + 1 > redirection_limit
-
-    redirect_method = method == :head ? :head : :get
-
-    page = fetch_page(response['Location'].to_s, redirect_method, {}, [],
-                      page, redirects + 1)
-
-    @history.push(page, from_uri)
-
-    return page
   end
 
-  def response_authenticate(response, page, uri, request, headers, params,
-                            referer)
-    raise ResponseCodeError, page unless @user || @password
-    raise ResponseCodeError, page if @auth_hash.has_key?(uri.host)
-
-    if response['www-authenticate'] =~ /Digest/i
-      @auth_hash[uri.host] = :digest
-      if response['server'] =~ /Microsoft-IIS/
-        @auth_hash[uri.host] = :iis_digest
-      end
-      @digest = response['www-authenticate']
-    else
-      @auth_hash[uri.host] = :basic
-    end
-
-    fetch_page(uri, request.method.downcase.to_sym, headers, params, referer)
+  def robots
+    @agent.robots
   end
 
-  private
-
-  def webrobots_http_get(uri)
-    get_file(uri)
-  rescue Mechanize::ResponseCodeError => e
-    return '' if e.response_code == '404'
-    raise e
+  def robots= enabled
+    @agent.robots = enabled
   end
 
-  def webrobots
-    @webrobots ||= WebRobots.new(@user_agent, :http_get => method(:webrobots_http_get))
-  end
+  alias :page :current_page
 
-  def set_http proxy = nil
-    @http = Net::HTTP::Persistent.new 'mechanize', proxy
+  private
 
-    @http.keep_alive = @keep_alive_time
-
-    @http.ca_file         = @ca_file
-    @http.verify_callback = @verify_callback
-
-    if @cert and @key then
-      cert = if OpenSSL::X509::Certificate === @cert then
-               @cert
-             else
-               OpenSSL::X509::Certificate.new ::File.read @cert
-             end
-
-      key = if OpenSSL::PKey::PKey === @key then
-              @key
-            else
-              OpenSSL::PKey::RSA.new ::File.read(@key), @pass
-            end
-
-      @http.certificate = cert
-      @http.private_key = key
-    end
-  end
-
   def post_form(uri, form, headers = {})
     cur_page = form.page || current_page ||
       Page.new(nil, {'content-type'=>'text/html'})
 
     request_data = form.request_data
@@ -985,103 +613,20 @@
       'Content-Type'    => form.enctype,
       'Content-Length'  => request_data.size.to_s,
     }.merge headers
 
     # fetch the page
-    page = fetch_page uri, :post, headers, [request_data], cur_page
+    page = @agent.fetch uri, :post, headers, [request_data], cur_page
     add_to_history(page)
     page
   end
 
-  # uri is an absolute URI
-  def fetch_page uri, method = :get, headers = {}, params = [],
-                 referer = current_page, redirects = 0
-    referer_uri = referer ? referer.uri : nil
-
-    uri = resolve uri, referer
-
-    uri, params = resolve_parameters uri, method, params
-
-    request = http_request uri, method, params
-
-    connection = connection_for uri
-
-    request_auth request, uri
-
-    enable_gzip request
-
-    request_language_charset request
-    request_cookies request, uri
-    request_host request, uri
-    request_referer request, uri, referer_uri
-    request_user_agent request
-    request_add_headers request, headers
-
-    pre_connect request
-
-    # Consult robots.txt
-    if robots && uri.is_a?(URI::HTTP)
-      robots_allowed?(uri) or raise RobotsDisallowedError.new(uri)
-    end
-
-    # Add If-Modified-Since if page is in history
-    if (page = visited_page(uri)) and page.response['Last-Modified']
-      request['If-Modified-Since'] = page.response['Last-Modified']
-    end if(@conditional_requests)
-
-    # Specify timeouts if given
-    connection.open_timeout = @open_timeout if @open_timeout
-    connection.read_timeout = @read_timeout if @read_timeout
-
-    request_log request
-
-    response_body = nil
-
-    # Send the request
-    response = connection.request(uri, request) { |res|
-      response_log res
-
-      response_body = response_read res, request
-
-      res
-    }
-
-    post_connect response
-
-    page = response_parse response, response_body, uri
-
-    response_cookies response, uri, page
-
-    meta = response_follow_meta_refresh response, uri, page, redirects
-    return meta if meta
-
-    case response
-    when Net::HTTPSuccess
-      if robots && page.is_a?(Page)
-        page.parser.noindex? and raise RobotsDisallowedError.new(uri)
-      end
-
-      page
-    when Mechanize::FileResponse
-      page
-    when Net::HTTPNotModified
-      log.debug("Got cached page") if log
-      visited_page(uri) || page
-    when Net::HTTPRedirection
-      response_redirect response, method, page, redirects
-    when Net::HTTPUnauthorized
-      response_authenticate(response, page, uri, request, headers, params,
-                            referer)
-    else
-      raise ResponseCodeError.new(page), "Unhandled response"
-    end
-  end
-
   def add_to_history(page)
-    @history.push(page, resolve(page.uri))
-    history_added.call(page) if history_added
+    @agent.history.push(page, @agent.resolve(page.uri))
+    @history_added.call(page) if @history_added
   end
+
 end
 
 require 'mechanize/content_type_error'
 require 'mechanize/cookie'
 require 'mechanize/cookie_jar'
@@ -1089,16 +634,19 @@
 require 'mechanize/file_connection'
 require 'mechanize/file_request'
 require 'mechanize/file_response'
 require 'mechanize/form'
 require 'mechanize/history'
+require 'mechanize/http'
+require 'mechanize/http/agent'
 require 'mechanize/page'
 require 'mechanize/inspect'
 require 'mechanize/monkey_patch'
 require 'mechanize/pluggable_parsers'
 require 'mechanize/redirect_limit_reached_error'
 require 'mechanize/redirect_not_get_or_head_error'
 require 'mechanize/response_code_error'
+require 'mechanize/response_read_error'
 require 'mechanize/robots_disallowed_error'
 require 'mechanize/unsupported_scheme_error'
 require 'mechanize/util'