require 'fileutils'
require 'forwardable'
require 'iconv' if RUBY_VERSION < '1.9.2'
require 'mime/types'
require 'mutex_m'
require 'net/http/digest_auth'
require 'net/http/persistent'
require 'nokogiri'
require 'openssl'
require 'pp'
require 'stringio'
require 'uri'
require 'webrick/httputils'
require 'zlib'
##
# The Mechanize library is used for automating interactions with a website. It
# can follow links and submit forms. Form fields can be populated and
# submitted. A history of URLs is maintained and can be queried.
#
# == Example
#
# require 'mechanize'
# require 'logger'
#
# agent = Mechanize.new
# agent.log = Logger.new "mech.log"
# agent.user_agent_alias = 'Mac Safari'
#
# page = agent.get "http://www.google.com/"
# search_form = page.form_with :name => "f"
# search_form.field_with(:name => "q").value = "Hello"
#
# search_results = agent.submit search_form
# puts search_results.body
#
# == Issues with mechanize
#
# If you think you have a bug with mechanize, but aren't sure, please file a
# ticket at https://github.com/tenderlove/mechanize/issues
#
# Here are some common problems you may experience with mechanize
#
# === Problems connecting to SSL sites
#
# Mechanize defaults to validating SSL certificates using the default CA
# certificates for your platform. At this time, Windows users do not have
# integration between the OS default CA certificates and OpenSSL. #cert_store
# explains how to download and use Mozilla's CA certificates to allow SSL
# sites to work.
#
# === Problems with content-length
#
# Some sites return an incorrect content-length value. Unlike a browser,
# mechanize raises an error when the content-length header does not match the
# response length since it does not know if there was a connection problem or
# if the mismatch is a server bug.
#
# The error raised, Mechanize::ResponseReadError, can be converted to a parsed
# Page, File, etc. depending upon the content-type:
#
# agent = Mechanize.new
# uri = URI 'http://example/invalid_content_length'
#
# begin
# page = agent.get uri
# rescue Mechanize::ResponseReadError => e
# page = e.force_parse
# end
class Mechanize
##
# The version of Mechanize you are using.
VERSION = '2.2'
##
# Base mechanize error class
class Error < RuntimeError
end
ruby_version = if RUBY_PATCHLEVEL >= 0 then
"#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}"
else
"#{RUBY_VERSION}dev#{RUBY_REVISION}"
end
##
# Supported User-Agent aliases for use with user_agent_alias=. The
# description in parenthesis is for informative purposes and is not part of
# the alias name.
#
# * Linux Firefox (3.6.1)
# * Linux Konqueror (3)
# * Linux Mozilla
# * Mac Firefox (3.6)
# * Mac Mozilla
# * Mac Safari (5)
# * Mac Safari 4
# * Mechanize (default)
# * Windows IE 6
# * Windows IE 7
# * Windows IE 8
# * Windows IE 9
# * Windows Mozilla
# * iPhone (3.0)
#
# Example:
#
# agent = Mechanize.new
# agent.user_agent_alias = 'Mac Safari'
AGENT_ALIASES = {
'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/tenderlove/mechanize/)",
'Linux Firefox' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.1) Gecko/20100122 firefox/3.6.1',
'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)',
'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624',
'Mac FireFox' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2) Gecko/20100115 Firefox/3.6',
'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401',
'Mac Safari 4' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10',
'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/534.51.22 (KHTML, like Gecko) Version/5.1.1 Safari/534.51.22',
'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)',
'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)',
'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6',
'iPhone' => 'Mozilla/5.0 (iPhone; U; CPU like Mac OS X; en) AppleWebKit/420+ (KHTML, like Gecko) Version/3.0 Mobile/1C28 Safari/419.3',
}
def self.inherited(child) # :nodoc:
child.html_parser ||= html_parser
child.log ||= log
super
end
##
# Creates a new mechanize instance. If a block is given, the created
# instance is yielded to the block for setting up pre-connection state such
# as SSL parameters or proxies:
#
# agent = Mechanize.new do |a|
# a.proxy_host = 'proxy.example'
# a.proxy_port = 8080
# end
def initialize
@agent = Mechanize::HTTP::Agent.new
@agent.context = self
@log = nil
# attr_accessors
@agent.user_agent = AGENT_ALIASES['Mechanize']
@watch_for_set = nil
@history_added = nil
# attr_readers
@pluggable_parser = PluggableParser.new
@keep_alive_time = 0
# Proxy
@proxy_addr = nil
@proxy_port = nil
@proxy_user = nil
@proxy_pass = nil
@html_parser = self.class.html_parser
@default_encoding = nil
@force_default_encoding = false
# defaults
@agent.max_history = 50
yield self if block_given?
@agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
end
# :section: History
#
# Methods for navigating and controlling history
##
# Equivalent to the browser back button. Returns the previous page visited.
def back
@agent.history.pop
end
##
# Returns the latest page loaded by Mechanize
def current_page
@agent.current_page
end
alias page current_page
##
# The history of this mechanize run
def history
@agent.history
end
##
# Maximum number of items allowed in the history.
def max_history
@agent.history.max_size
end
##
# Sets the maximum number of items allowed in the history to +length+.
#
# Setting the maximum history length to nil will make the history size
# unlimited. Take care when doing this, mechanize stores page bodies in the
# temporary files directory for pages in the history. For a long-running
# mechanize program this can be quite large.
def max_history= length
@agent.history.max_size = length
end
##
# Returns a visited page for the +url+ passed in, otherwise nil
def visited? url
url = url.href if url.respond_to? :href
@agent.visited_page url
end
##
# Returns whether or not a url has been visited
alias visited_page visited?
# :section: Hooks
#
# Hooks into the operation of mechanize
##
# A list of hooks to call before reading response header 'content-encoding'.
#
# The hook is called with the agent making the request, the URI of the
# request, the response an IO containing the response body.
def content_encoding_hooks
@agent.content_encoding_hooks
end
##
# Callback which is invoked with the page that was added to history.
attr_accessor :history_added
##
# A list of hooks to call after retrieving a response. Hooks are called with
# the agent and the response returned.
def post_connect_hooks
@agent.post_connect_hooks
end
##
# A list of hooks to call before making a request. Hooks are called with
# the agent and the request to be performed.
def pre_connect_hooks
@agent.pre_connect_hooks
end
# :section: Requests
#
# Methods for making HTTP requests
##
# If the parameter is a string, finds the button or link with the
# value of the string on the current page and clicks it. Otherwise, clicks
# the Mechanize::Page::Link object passed in. Returns the page fetched.
def click link
case link
when Page::Link then
referer = link.page || current_page()
if @agent.robots
if (referer.is_a?(Page) and referer.parser.nofollow?) or
link.rel?('nofollow') then
raise RobotsDisallowedError.new(link.href)
end
end
if link.noreferrer?
href = @agent.resolve(link.href, link.page || current_page)
referer = Page.new(nil, {'content-type'=>'text/html'})
else
href = link.href
end
get href, [], referer
when String, Regexp then
if real_link = page.link_with(:text => link)
click real_link
else
button = nil
form = page.forms.find do |f|
button = f.button_with(:value => link)
button.is_a? Form::Submit
end
submit form, button if form
end
else
referer = current_page()
href = link.respond_to?(:href) ? link.href :
(link['href'] || link['src'])
get href, [], referer
end
end
##
# GETs +uri+ and writes it to +io_or_filename+ without recording the request
# in the history. If +io_or_filename+ does not respond to #write it will be
# used as a file name. +parameters+, +referer+ and +headers+ are used as in
# #get.
#
# By default, if the Content-type of the response matches a Mechanize::File
# or Mechanize::Page parser, the response body will be loaded into memory
# before being saved. See #pluggable_parser for details on changing this
# default.
#
# For alternate ways of downloading files see Mechanize::FileSaver and
# Mechanize::DirectorySaver.
def download uri, io_or_filename, parameters = [], referer = nil, headers = {}
page = transact do
get uri, parameters, referer, headers
end
io = if io_or_filename.respond_to? :write then
io_or_filename
else
open io_or_filename, 'wb'
end
case page
when Mechanize::File then
io.write page.body
else
body_io = page.body_io
until body_io.eof? do
io.write body_io.read 16384
end
end
page
ensure
io.close if io and not io_or_filename.respond_to? :write
end
##
# DELETE +uri+ with +query_params+, and setting +headers+:
#
# delete('http://example/', {'q' => 'foo'}, {})
def delete(uri, query_params = {}, headers = {})
page = @agent.fetch(uri, :delete, headers, query_params)
add_to_history(page)
page
end
##
# GET the +uri+ with the given request +parameters+, +referer+ and
# +headers+.
#
# The +referer+ may be a URI or a page.
def get(uri, parameters = [], referer = nil, headers = {})
method = :get
referer ||=
if uri.to_s =~ %r{\Ahttps?://}
Page.new(nil, 'content-type' => 'text/html')
else
current_page || Page.new(nil, 'content-type' => 'text/html')
end
# FIXME: Huge hack so that using a URI as a referer works. I need to
# refactor everything to pass around URIs but still support
# Mechanize::Page#base
unless Mechanize::Parser === referer then
referer = if referer.is_a?(String) then
Page.new URI(referer), 'content-type' => 'text/html'
else
Page.new referer, 'content-type' => 'text/html'
end
end
# fetch the page
headers ||= {}
page = @agent.fetch uri, method, headers, parameters, referer
add_to_history(page)
yield page if block_given?
page
end
##
# GET +url+ and return only its contents
def get_file(url)
get(url).body
end
##
# HEAD +uri+ with +query_params+ and +headers+:
#
# head('http://example/', {'q' => 'foo'}, {})
def head(uri, query_params = {}, headers = {})
page = @agent.fetch uri, :head, headers, query_params
yield page if block_given?
page
end
##
# POST to the given +uri+ with the given +query+. The query is specified by
# either a string, or a list of key-value pairs represented by a hash or an
# array of arrays.
#
# Examples:
# agent.post 'http://example.com/', "foo" => "bar"
#
# agent.post 'http://example.com/', [%w[foo bar]]
#
# agent.post('http://example.com/', "hello",
# 'Content-Type' => 'application/xml')
def post(uri, query={}, headers={})
return request_with_entity(:post, uri, query, headers) if String === query
node = {}
# Create a fake form
class << node
def search(*args); []; end
end
node['method'] = 'POST'
node['enctype'] = 'application/x-www-form-urlencoded'
form = Form.new(node)
query.each { |k, v|
if v.is_a?(IO)
form.enctype = 'multipart/form-data'
ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path))
ul.file_data = v.read
form.file_uploads << ul
else
form.fields << Form::Field.new({'name' => k.to_s},v)
end
}
post_form(uri, form, headers)
end
##
# PUT to +uri+ with +entity+, and setting +headers+:
#
# put('http://example/', 'new content', {'Content-Type' => 'text/plain'})
def put(uri, entity, headers = {})
request_with_entity(:put, uri, entity, headers)
end
##
# Makes an HTTP request to +url+ using HTTP method +verb+. +entity+ is used
# as the request body, if allowed.
def request_with_entity(verb, uri, entity, headers = {})
cur_page = current_page || Page.new(nil, {'content-type'=>'text/html'})
headers = {
'Content-Type' => 'application/octet-stream',
'Content-Length' => entity.size.to_s,
}.update headers
page = @agent.fetch uri, verb, headers, [entity], cur_page
add_to_history(page)
page
end
##
# Submits +form+ with an optional +button+.
#
# Without a button:
#
# page = agent.get('http://example.com')
# agent.submit(page.forms.first)
#
# With a button:
#
# agent.submit(page.forms.first, page.forms.first.buttons.first)
def submit(form, button=nil, headers={})
form.add_button_to_query(button) if button
case form.method.upcase
when 'POST'
post_form(form.action, form, headers)
when 'GET'
get(form.action.gsub(/\?[^\?]*$/, ''),
form.build_query,
form.page,
headers)
else
raise ArgumentError, "unsupported method: #{form.method.upcase}"
end
end
##
# Runs given block, then resets the page history as it was before. self is
# given as a parameter to the block. Returns the value of the block.
def transact
history_backup = @agent.history.dup
begin
yield self
ensure
@agent.history = history_backup
end
end
# :section: Settings
#
# Settings that adjust how mechanize makes HTTP requests including timeouts,
# keep-alives, compression, redirects and headers.
@html_parser = Nokogiri::HTML
class << self
##
# Default HTML parser for all mechanize instances
#
# Mechanize.html_parser = Nokogiri::XML
attr_accessor :html_parser
##
# Default logger for all mechanize instances
#
# Mechanize.log = Logger.new $stderr
attr_accessor :log
end
##
# A default encoding name used when parsing HTML parsing. When set it is
# used after any other encoding. The default is nil.
attr_accessor :default_encoding
##
# Overrides the encodings given by the HTTP server and the HTML page with
# the default_encoding when set to true.
attr_accessor :force_default_encoding
##
# The HTML parser to be used when parsing documents
attr_accessor :html_parser
##
# HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it
# now uses net-http-persistent which only supports HTTP/1.1 persistent
# connections
attr_accessor :keep_alive_time
##
# The pluggable parser maps a response Content-Type to a parser class. The
# registered Content-Type may be either a full content type like 'image/png'
# or a media type 'text'. See Mechanize::PluggableParser for further
# details.
#
# Example:
#
# agent.pluggable_parser['application/octet-stream'] = Mechanize::Download
attr_reader :pluggable_parser
##
# The HTTP proxy address
attr_reader :proxy_addr
##
# The HTTP proxy password
attr_reader :proxy_pass
##
# The HTTP proxy port
attr_reader :proxy_port
##
# The HTTP proxy username
attr_reader :proxy_user
##
# Sets the user and password to be used for HTTP authentication.
# sets the optional domain for NTLM authentication
def auth(user, password, domain = nil)
@agent.user = user
@agent.password = password
@agent.domain = domain
end
alias basic_auth auth
##
# Are If-Modified-Since conditional requests enabled?
def conditional_requests
@agent.conditional_requests
end
##
# Disables If-Modified-Since conditional requests (enabled by default)
def conditional_requests= enabled
@agent.conditional_requests = enabled
end
##
# A Mechanize::CookieJar which stores cookies
def cookie_jar
@agent.cookie_jar
end
##
# Replaces the cookie jar with +cookie_jar+
def cookie_jar= cookie_jar
@agent.cookie_jar = cookie_jar
end
##
# Returns a list of cookies stored in the cookie jar.
def cookies
@agent.cookie_jar.to_a
end
##
# Follow HTML meta refresh and HTTP Refresh headers. If set to +:anywhere+
# meta refresh tags outside of the head element will be followed.
def follow_meta_refresh
@agent.follow_meta_refresh
end
##
# Controls following of HTML meta refresh and HTTP Refresh headers in
# responses.
def follow_meta_refresh= follow
@agent.follow_meta_refresh = follow
end
##
# Follow an HTML meta refresh and HTTP Refresh headers that have no "url="
# in the content attribute.
#
# Defaults to false to prevent infinite refresh loops.
def follow_meta_refresh_self
@agent.follow_meta_refresh_self
end
##
# Alters the following of HTML meta refresh and HTTP Refresh headers that
# point to the same page.
def follow_meta_refresh_self= follow
@agent.follow_meta_refresh_self = follow
end
##
# Is gzip compression of responses enabled?
def gzip_enabled
@agent.gzip_enabled
end
##
# Disables HTTP/1.1 gzip compression (enabled by default)
def gzip_enabled=enabled
@agent.gzip_enabled = enabled
end
##
# Connections that have not been used in this many seconds will be reset.
def idle_timeout
@agent.idle_timeout
end
# Sets the idle timeout to +idle_timeout+. The default timeout is 5
# seconds. If you experience "too many connection resets", reducing this
# value may help.
def idle_timeout= idle_timeout
@agent.idle_timeout = idle_timeout
end
##
# Are HTTP/1.1 keep-alive connections enabled?
def keep_alive
@agent.keep_alive
end
##
# Disable HTTP/1.1 keep-alive connections if +enable+ is set to false. If
# you are experiencing "too many connection resets" errors setting this to
# false will eliminate them.
#
# You should first investigate reducing idle_timeout.
def keep_alive= enable
@agent.keep_alive = enable
end
##
# The current logger. If no logger has been set Mechanize.log is used.
def log
@log || Mechanize.log
end
##
# Sets the +logger+ used by this instance of mechanize
def log= logger
@log = logger
end
##
# Responses larger than this will be written to a Tempfile instead of stored
# in memory. The default is 10240 bytes
def max_file_buffer
@agent.max_file_buffer
end
##
# Sets the maximum size of a response body that will be stored in memory to
# +bytes+
def max_file_buffer= bytes
@agent.max_file_buffer = bytes
end
##
# Length of time to wait until a connection is opened in seconds
def open_timeout
@agent.open_timeout
end
##
# Sets the connection open timeout to +open_timeout+
def open_timeout= open_timeout
@agent.open_timeout = open_timeout
end
##
# Length of time to wait for data from the server
def read_timeout
@agent.read_timeout
end
##
# Sets the timeout for each chunk of data read from the server to
# +read_timeout+. A single request may read many chunks of data.
def read_timeout= read_timeout
@agent.read_timeout = read_timeout
end
##
# Controls how mechanize deals with redirects. The following values are
# allowed:
#
# :all, true:: All 3xx redirects are followed (default)
# :permanent:: Only 301 Moved Permanantly redirects are followed
# false:: No redirects are followed
def redirect_ok
@agent.redirect_ok
end
alias follow_redirect? redirect_ok
##
# Sets the mechanize redirect handling policy. See redirect_ok for allowed
# values
def redirect_ok= follow
@agent.redirect_ok = follow
end
##
# Maximum number of redirections to follow
def redirection_limit
@agent.redirection_limit
end
##
# Sets the maximum number of redirections to follow to +limit+
def redirection_limit= limit
@agent.redirection_limit = limit
end
##
# A hash of custom request headers that will be sent on every request
def request_headers
@agent.request_headers
end
##
# Replaces the custom request headers that will be sent on every request
# with +request_headers+
def request_headers= request_headers
@agent.request_headers = request_headers
end
##
# Retry POST and other non-idempotent requests. See RFC 2616 9.1.2.
def retry_change_requests
@agent.retry_change_requests
end
##
# When setting +retry_change_requests+ to true you are stating that, for all
# the URLs you access with mechanize, making POST and other non-idempotent
# requests is safe and will not cause data duplication or other harmful
# results.
#
# If you are experiencing "too many connection resets" errors you should
# instead investigate reducing the idle_timeout or disabling keep_alive
# connections.
def retry_change_requests= retry_change_requests
@agent.retry_change_requests = retry_change_requests
end
##
# Will /robots.txt
files be obeyed?
def robots
@agent.robots
end
##
# When +enabled+ mechanize will retrieve and obey robots.txt
# files
def robots= enabled
@agent.robots = enabled
end
##
# The handlers for HTTP and other URI protocols.
def scheme_handlers
@agent.scheme_handlers
end
##
# Replaces the URI scheme handler table with +scheme_handlers+
def scheme_handlers= scheme_handlers
@agent.scheme_handlers = scheme_handlers
end
##
# The identification string for the client initiating a web request
def user_agent
@agent.user_agent
end
##
# Sets the User-Agent used by mechanize to +user_agent+. See also
# user_agent_alias
def user_agent= user_agent
@agent.user_agent = user_agent
end
##
# Set the user agent for the Mechanize object based on the given +name+.
#
# See also AGENT_ALIASES
def user_agent_alias= name
self.user_agent = AGENT_ALIASES[name] ||
raise(ArgumentError, "unknown agent alias #{name.inspect}")
end
##
# The value of watch_for_set is passed to pluggable parsers for retrieved
# content
attr_accessor :watch_for_set
# :section: SSL
#
# SSL settings for mechanize. These must be set in the block given to
# Mechanize.new
##
# Path to an OpenSSL server certificate file
def ca_file
@agent.ca_file
end
##
# Sets the certificate file used for SSL connections
def ca_file= ca_file
@agent.ca_file = ca_file
end
##
# An OpenSSL client certificate or the path to a certificate file.
def cert
@agent.cert
end
##
# Sets the OpenSSL client certificate +cert+ to the given path or
# certificate instance
def cert= cert
@agent.certificate = cert
end
##
# An OpenSSL certificate store for verifying server certificates. This
# defaults to the default certificate store for your system.
#
# If your system does not ship with a default set of certificates you can
# retrieve a copy of the set from Mozilla here:
# http://curl.haxx.se/docs/caextract.html
#
# (Note that this set does not have an HTTPS download option so you may
# wish to use the firefox-db2pem.sh script to extract the certificates
# from a local install to avoid man-in-the-middle attacks.)
#
# After downloading or generating a cacert.pem from the above link you
# can create a certificate store from the pem file like this:
#
# cert_store = OpenSSL::X509::Store.new
# cert_store.add_file 'cacert.pem'
#
# And have mechanize use it with:
#
# agent.cert_store = cert_store
def cert_store
@agent.cert_store
end
##
# Sets the OpenSSL certificate store to +store+.
#
# See also #cert_store
def cert_store= cert_store
@agent.cert_store = cert_store
end
##
# What is this?
#
# Why is it different from #cert?
def certificate # :nodoc:
@agent.certificate
end
##
# An OpenSSL private key or the path to a private key
def key
@agent.key
end
##
# Sets the OpenSSL client +key+ to the given path or key instance. If a
# path is given, the path must contain an RSA key file.
def key= key
@agent.private_key = key
end
##
# OpenSSL client key password
def pass
@agent.pass
end
##
# Sets the client key password to +pass+
def pass= pass
@agent.pass = pass
end
##
# SSL version to use. Ruby 1.9 and newer only.
def ssl_version
@agent.ssl_version
end if RUBY_VERSION > '1.9'
##
# Sets the SSL version to use to +version+ without client/server
# negotiation. Ruby 1.9 and newer only.
def ssl_version= ssl_version
@agent.ssl_version = ssl_version
end if RUBY_VERSION > '1.9'
##
# A callback for additional certificate verification. See
# OpenSSL::SSL::SSLContext#verify_callback
#
# The callback can be used for debugging or to ignore errors by always
# returning +true+. Specifying nil uses the default method that was valid
# when the SSLContext was created
def verify_callback
@agent.verify_callback
end
##
# Sets the OpenSSL certificate verification callback
def verify_callback= verify_callback
@agent.verify_callback = verify_callback
end
##
# the OpenSSL server certificate verification method. The default is
# OpenSSL::SSL::VERIFY_PEER and certificate verification uses the default
# system certificates. See also cert_store
def verify_mode
@agent.verify_mode
end
##
# Sets the OpenSSL server certificate verification method.
def verify_mode= verify_mode
@agent.verify_mode = verify_mode
end
# :section: Utilities
attr_reader :agent # :nodoc:
##
# Parses the +body+ of the +response+ from +uri+ using the pluggable parser
# that matches its content type
def parse uri, response, body
content_type = nil
unless response['Content-Type'].nil?
data, = response['Content-Type'].split ';', 2
content_type, = data.downcase.split ',', 2 unless data.nil?
end
parser_klass = @pluggable_parser.parser content_type
unless parser_klass <= Mechanize::Download then
body = case body
when IO, Tempfile, StringIO then
body.read
else
body
end
end
parser_klass.new uri, response, body, response.code do |parser|
parser.mech = self if parser.respond_to? :mech=
parser.watch_for_set = @watch_for_set if
@watch_for_set and parser.respond_to?(:watch_for_set=)
end
end
def pretty_print(q) # :nodoc:
q.object_group(self) {
q.breakable
q.pp cookie_jar
q.breakable
q.pp current_page
}
end
##
# Sets the proxy +address+ at +port+ with an optional +user+ and +password+
def set_proxy address, port, user = nil, password = nil
@proxy_addr = address
@proxy_port = port
@proxy_user = user
@proxy_pass = password
@agent.set_proxy address, port, user, password
end
private
##
# Posts +form+ to +uri+
def post_form(uri, form, headers = {})
cur_page = form.page || current_page ||
Page.new(nil, {'content-type'=>'text/html'})
request_data = form.request_data
log.debug("query: #{ request_data.inspect }") if log
headers = {
'Content-Type' => form.enctype,
'Content-Length' => request_data.size.to_s,
}.merge headers
# fetch the page
page = @agent.fetch uri, :post, headers, [request_data], cur_page
add_to_history(page)
page
end
##
# Adds +page+ to the history
def add_to_history(page)
@agent.history.push(page, @agent.resolve(page.uri))
@history_added.call(page) if @history_added
end
end
require 'mechanize/content_type_error'
require 'mechanize/cookie'
require 'mechanize/cookie_jar'
require 'mechanize/parser'
require 'mechanize/download'
require 'mechanize/directory_saver'
require 'mechanize/file'
require 'mechanize/file_connection'
require 'mechanize/file_request'
require 'mechanize/file_response'
require 'mechanize/form'
require 'mechanize/history'
require 'mechanize/http'
require 'mechanize/http/agent'
require 'mechanize/http/auth_challenge'
require 'mechanize/http/auth_realm'
require 'mechanize/http/content_disposition_parser'
require 'mechanize/http/www_authenticate_parser'
require 'mechanize/image'
require 'mechanize/page'
require 'mechanize/monkey_patch'
require 'mechanize/pluggable_parsers'
require 'mechanize/redirect_limit_reached_error'
require 'mechanize/redirect_not_get_or_head_error'
require 'mechanize/response_code_error'
require 'mechanize/unauthorized_error'
require 'mechanize/response_read_error'
require 'mechanize/robots_disallowed_error'
require 'mechanize/unsupported_scheme_error'
require 'mechanize/util'