# frozen_string_literal: true require 'mechanize/version' require 'fileutils' require 'forwardable' require 'net/http/digest_auth' require 'net/http/persistent' require 'nokogiri' require 'openssl' require 'pp' require 'stringio' require 'uri' require 'webrick/httputils' require 'zlib' ## # The Mechanize library is used for automating interactions with a website. It # can follow links and submit forms. Form fields can be populated and # submitted. A history of URLs is maintained and can be queried. # # == Example # # require 'mechanize' # require 'logger' # # agent = Mechanize.new # agent.log = Logger.new "mech.log" # agent.user_agent_alias = 'Mac Safari' # # page = agent.get "http://www.google.com/" # search_form = page.form_with :name => "f" # search_form.field_with(:name => "q").value = "Hello" # # search_results = agent.submit search_form # puts search_results.body # # == Issues with mechanize # # If you think you have a bug with mechanize, but aren't sure, please file a # ticket at https://github.com/sparklemotion/mechanize/issues # # Here are some common problems you may experience with mechanize # # === Problems connecting to SSL sites # # Mechanize defaults to validating SSL certificates using the default CA # certificates for your platform. At this time, Windows users do not have # integration between the OS default CA certificates and OpenSSL. #cert_store # explains how to download and use Mozilla's CA certificates to allow SSL # sites to work. # # === Problems with content-length # # Some sites return an incorrect content-length value. Unlike a browser, # mechanize raises an error when the content-length header does not match the # response length since it does not know if there was a connection problem or # if the mismatch is a server bug. # # The error raised, Mechanize::ResponseReadError, can be converted to a parsed # Page, File, etc. depending upon the content-type: # # agent = Mechanize.new # uri = URI 'http://example/invalid_content_length' # # begin # page = agent.get uri # rescue Mechanize::ResponseReadError => e # page = e.force_parse # end class Mechanize ## # Base mechanize error class class Error < RuntimeError end ruby_version = if RUBY_PATCHLEVEL >= 0 then "#{RUBY_VERSION}p#{RUBY_PATCHLEVEL}" else "#{RUBY_VERSION}dev#{RUBY_REVISION}" end ## # Supported User-Agent aliases for use with user_agent_alias=. The # description in parenthesis is for informative purposes and is not part of # the alias name. # # The default User-Agent alias: # # * "Mechanize" # # Linux User-Agent aliases: # # * "Linux Firefox" # * "Linux Konqueror" # * "Linux Mozilla" # # Mac User-Agent aliases: # # * "Mac Firefox" # * "Mac Mozilla" # * "Mac Safari 4" # * "Mac Safari" # # Windows User-Agent aliases: # # * "Windows Chrome" # * "Windows Edge" # * "Windows Firefox" # * "Windows IE 6" # * "Windows IE 7" # * "Windows IE 8" # * "Windows IE 9" # * "Windows IE 10" # * "Windows IE 11" # * "Windows Mozilla" # # Mobile User-Agent aliases: # # * "Android" # * "iPad" # * "iPhone" # # Example: # # agent = Mechanize.new # agent.user_agent_alias = 'Mac Safari' # AGENT_ALIASES = { 'Mechanize' => "Mechanize/#{VERSION} Ruby/#{ruby_version} (http://github.com/sparklemotion/mechanize/)", 'Linux Firefox' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/121.0', 'Linux Konqueror' => 'Mozilla/5.0 (compatible; Konqueror/3; Linux)', 'Linux Mozilla' => 'Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4) Gecko/20030624', 'Mac Firefox' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14.2; rv:109.0) Gecko/20100101 Firefox/121.0', 'Mac Mozilla' => 'Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.4a) Gecko/20030401', 'Mac Safari 4' => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; de-at) AppleWebKit/531.21.8 (KHTML, like Gecko) Version/4.0.4 Safari/531.21.10', 'Mac Safari' => 'Mozilla/5.0 (Macintosh; Intel Mac OS X 14_2_1) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15', 'Windows Chrome' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Windows Edge' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.2210.133', 'Windows Firefox' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/121.0', 'Windows IE 6' => 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)', 'Windows IE 7' => 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Windows IE 8' => 'Mozilla/5.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727)', 'Windows IE 9' => 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)', 'Windows IE 10' => 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0)', 'Windows IE 11' => 'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko', 'Windows Mozilla' => 'Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.4b) Gecko/20030516 Mozilla Firebird/0.6', 'Android' => 'Mozilla/5.0 (Linux; Android 10; K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.6099.210 Mobile Safari/537.36', 'iPad' => 'Mozilla/5.0 (iPad; CPU OS 17_2_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', 'iPhone' => 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_2_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Mobile/15E148 Safari/604.1', } AGENT_ALIASES.default_proc = proc { |hash, key| case key when /FireFox/ if ua = hash[nkey = key.sub(/FireFox/, 'Firefox')] warn "Mechanize#user_agent_alias: #{key.inspect} should be spelled as #{nkey.inspect}" ua end end } def self.inherited(child) # :nodoc: child.html_parser = html_parser child.log = log super end ## # Creates a new Mechanize instance and yields it to the given block. # # After the block executes, the instance is cleaned up. This includes # closing all open connections. # # Mechanize.start do |m| # m.get("http://example.com") # end def self.start instance = new yield(instance) ensure instance.shutdown end ## # Creates a new mechanize instance. If a block is given, the created # instance is yielded to the block for setting up pre-connection state such # as SSL parameters or proxies: # # agent = Mechanize.new do |a| # a.proxy_addr = 'proxy.example' # a.proxy_port = 8080 # end # # If you need segregated SSL connections give each agent a unique # name. Otherwise the connections will be shared. This is # particularly important if you are using certificates. # # agent_1 = Mechanize.new 'conn1' # agent_2 = Mechanize.new 'conn2' # def initialize(connection_name = 'mechanize') @agent = Mechanize::HTTP::Agent.new(connection_name) @agent.context = self @log = nil # attr_accessors @agent.user_agent = AGENT_ALIASES['Mechanize'] @watch_for_set = nil @history_added = nil # attr_readers @pluggable_parser = PluggableParser.new @keep_alive_time = 0 # Proxy @proxy_addr = nil @proxy_port = nil @proxy_user = nil @proxy_pass = nil @html_parser = self.class.html_parser @default_encoding = nil @force_default_encoding = false # defaults @agent.max_history = 50 yield self if block_given? @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass end # :section: History # # Methods for navigating and controlling history ## # Equivalent to the browser back button. Returns the previous page visited. def back @agent.history.pop end ## # Returns the latest page loaded by Mechanize def current_page @agent.current_page end alias page current_page ## # The history of this mechanize run def history @agent.history end ## # Maximum number of items allowed in the history. The default setting is 50 # pages. Note that the size of the history multiplied by the maximum # response body size def max_history @agent.history.max_size end ## # Sets the maximum number of items allowed in the history to +length+. # # Setting the maximum history length to nil will make the history size # unlimited. Take care when doing this, mechanize stores response bodies in # memory for pages and in the temporary files directory for other responses. # For a long-running mechanize program this can be quite large. # # See also the discussion under #max_file_buffer= def max_history= length @agent.history.max_size = length end ## # Returns a visited page for the +url+ passed in, otherwise nil def visited? url url = url.href if url.respond_to? :href @agent.visited_page url end ## # Returns whether or not a url has been visited alias visited_page visited? # :section: Hooks # # Hooks into the operation of mechanize ## # A list of hooks to call before reading response header 'content-encoding'. # # The hook is called with the agent making the request, the URI of the # request, the response an IO containing the response body. def content_encoding_hooks @agent.content_encoding_hooks end ## # Callback which is invoked with the page that was added to history. attr_accessor :history_added ## # A list of hooks to call after retrieving a response. Hooks are called with # the agent, the URI, the response, and the response body. def post_connect_hooks @agent.post_connect_hooks end ## # A list of hooks to call before retrieving a response. Hooks are called # with the agent, the URI, the response, and the response body. def pre_connect_hooks @agent.pre_connect_hooks end # :section: Requests # # Methods for making HTTP requests ## # If the parameter is a string, finds the button or link with the # value of the string on the current page and clicks it. Otherwise, clicks # the Mechanize::Page::Link object passed in. Returns the page fetched. def click link case link when Page::Link then referer = link.page || current_page() if @agent.robots if (referer.is_a?(Page) and referer.parser.nofollow?) or link.rel?('nofollow') then raise RobotsDisallowedError.new(link.href) end end if link.noreferrer? href = @agent.resolve(link.href, link.page || current_page) referer = Page.new else href = link.href end get href, [], referer when String, Regexp then if real_link = page.link_with(:text => link) click real_link else button = nil # Note that this will not work if we have since navigated to a different page. # Should rather make each button aware of its parent form. form = page.forms.find do |f| button = f.button_with(:value => link) button.is_a? Form::Submit end submit form, button if form end when Form::Submit, Form::ImageButton then # Note that this will not work if we have since navigated to a different page. # Should rather make each button aware of its parent form. form = page.forms.find do |f| f.buttons.include?(link) end submit form, link if form else referer = current_page() href = link.respond_to?(:href) ? link.href : (link['href'] || link['src']) get href, [], referer end end ## # GETs +uri+ and writes it to +io_or_filename+ without recording the request # in the history. If +io_or_filename+ does not respond to #write it will be # used as a file name. +parameters+, +referer+ and +headers+ are used as in # #get. # # By default, if the Content-type of the response matches a Mechanize::File # or Mechanize::Page parser, the response body will be loaded into memory # before being saved. See #pluggable_parser for details on changing this # default. # # For alternate ways of downloading files see Mechanize::FileSaver and # Mechanize::DirectorySaver. def download uri, io_or_filename, parameters = [], referer = nil, headers = {} page = transact do get uri, parameters, referer, headers end io = if io_or_filename.respond_to? :write then io_or_filename else ::File.open(io_or_filename, 'wb') end case page when Mechanize::File then io.write page.body else body_io = page.body_io until body_io.eof? do io.write body_io.read 16384 end end page ensure io.close if io and not io_or_filename.respond_to? :write end ## # DELETE +uri+ with +query_params+, and setting +headers+: # # +query_params+ is formatted into a query string using # Mechanize::Util.build_query_string, which see. # # delete('http://example/', {'q' => 'foo'}, {}) def delete(uri, query_params = {}, headers = {}) page = @agent.fetch(uri, :delete, headers, query_params) add_to_history(page) page end ## # GET the +uri+ with the given request +parameters+, +referer+ and # +headers+. # # The +referer+ may be a URI or a page. # # +parameters+ is formatted into a query string using # Mechanize::Util.build_query_string, which see. def get(uri, parameters = [], referer = nil, headers = {}) method = :get referer ||= if uri.to_s =~ %r{\Ahttps?://} Page.new else current_page || Page.new end # FIXME: Huge hack so that using a URI as a referer works. I need to # refactor everything to pass around URIs but still support # Mechanize::Page#base unless Mechanize::Parser === referer then referer = if referer.is_a?(String) then Page.new URI(referer) else Page.new referer end end # fetch the page headers ||= {} page = @agent.fetch uri, method, headers, parameters, referer add_to_history(page) yield page if block_given? page end ## # GET +url+ and return only its contents def get_file(url) get(url).body end ## # HEAD +uri+ with +query_params+ and +headers+: # # +query_params+ is formatted into a query string using # Mechanize::Util.build_query_string, which see. # # head('http://example/', {'q' => 'foo'}, {}) def head(uri, query_params = {}, headers = {}) page = @agent.fetch uri, :head, headers, query_params yield page if block_given? page end ## # POST to the given +uri+ with the given +query+. # # +query+ is processed using Mechanize::Util.each_parameter (which # see), and then encoded into an entity body. If any IO/FileUpload # object is specified as a field value the "enctype" will be # multipart/form-data, or application/x-www-form-urlencoded # otherwise. # # Examples: # agent.post 'http://example.com/', "foo" => "bar" # # agent.post 'http://example.com/', [%w[foo bar]] # # agent.post('http://example.com/', "hello", # 'Content-Type' => 'application/xml') def post(uri, query = {}, headers = {}) return request_with_entity(:post, uri, query, headers) if String === query node = {} # Create a fake form class << node def search(*args); []; end end node['method'] = 'POST' node['enctype'] = 'application/x-www-form-urlencoded' form = Form.new(node) Mechanize::Util.each_parameter(query) { |k, v| if v.is_a?(IO) form.enctype = 'multipart/form-data' ul = Form::FileUpload.new({'name' => k.to_s},::File.basename(v.path)) ul.file_data = v.read form.file_uploads << ul elsif v.is_a?(Form::FileUpload) form.enctype = 'multipart/form-data' form.file_uploads << v else form.fields << Form::Field.new({'name' => k.to_s},v) end } post_form(uri, form, headers) end ## # PUT to +uri+ with +entity+, and setting +headers+: # # put('http://example/', 'new content', {'Content-Type' => 'text/plain'}) def put(uri, entity, headers = {}) request_with_entity(:put, uri, entity, headers) end ## # Makes an HTTP request to +url+ using HTTP method +verb+. +entity+ is used # as the request body, if allowed. def request_with_entity(verb, uri, entity, headers = {}) cur_page = current_page || Page.new log.debug("query: #{ entity.inspect }") if log headers = { 'Content-Type' => 'application/octet-stream', 'Content-Length' => entity.size.to_s, }.update headers page = @agent.fetch uri, verb, headers, [entity], cur_page add_to_history(page) page end ## # Submits +form+ with an optional +button+. # # Without a button: # # page = agent.get('http://example.com') # agent.submit(page.forms.first) # # With a button: # # agent.submit(page.forms.first, page.forms.first.buttons.first) def submit(form, button = nil, headers = {}) form.add_button_to_query(button) if button case form.method.upcase when 'POST' post_form(form.action, form, headers) when 'GET' get(form.action.gsub(/\?[^\?]*$/, ''), form.build_query, form.page, headers) else raise ArgumentError, "unsupported method: #{form.method.upcase}" end end ## # Runs given block, then resets the page history as it was before. self is # given as a parameter to the block. Returns the value of the block. def transact history_backup = @agent.history.dup begin yield self ensure @agent.history = history_backup end end # :section: Settings # # Settings that adjust how mechanize makes HTTP requests including timeouts, # keep-alives, compression, redirects and headers. @html_parser = Nokogiri::HTML @log = nil class << self ## # Default HTML parser for all mechanize instances # # Mechanize.html_parser = Nokogiri::XML attr_accessor :html_parser ## # Default logger for all mechanize instances # # Mechanize.log = Logger.new $stderr attr_accessor :log end ## # A default encoding name used when parsing HTML parsing. When set it is # used after any other encoding. The default is nil. attr_accessor :default_encoding ## # Overrides the encodings given by the HTTP server and the HTML page with # the default_encoding when set to true. attr_accessor :force_default_encoding ## # The HTML parser to be used when parsing documents attr_accessor :html_parser ## # HTTP/1.0 keep-alive time. This is no longer supported by mechanize as it # now uses net-http-persistent which only supports HTTP/1.1 persistent # connections attr_accessor :keep_alive_time ## # The pluggable parser maps a response Content-Type to a parser class. The # registered Content-Type may be either a full content type like 'image/png' # or a media type 'text'. See Mechanize::PluggableParser for further # details. # # Example: # # agent.pluggable_parser['application/octet-stream'] = Mechanize::Download attr_reader :pluggable_parser ## # The HTTP proxy address attr_reader :proxy_addr ## # The HTTP proxy password attr_reader :proxy_pass ## # The HTTP proxy port attr_reader :proxy_port ## # The HTTP proxy username attr_reader :proxy_user ## # *NOTE*: These credentials will be used as a default for any challenge # exposing your password to disclosure to malicious servers. Use of this # method will warn. This method is deprecated and will be removed in # mechanize 3. # # Sets the +user+ and +password+ as the default credentials to be used for # HTTP authentication for any server. The +domain+ is used for NTLM # authentication. def auth user, password, domain = nil caller.first =~ /(.*?):(\d+).*?$/ warn <<-WARNING At #{$1} line #{$2} Use of #auth and #basic_auth are deprecated due to a security vulnerability. WARNING @agent.add_default_auth user, password, domain end alias basic_auth auth ## # Adds credentials +user+, +pass+ for +uri+. If +realm+ is set the # credentials are used only for that realm. If +realm+ is not set the # credentials become the default for any realm on that URI. # # +domain+ and +realm+ are exclusive as NTLM does not follow RFC 2617. If # +domain+ is given it is only used for NTLM authentication. def add_auth uri, user, password, realm = nil, domain = nil @agent.add_auth uri, user, password, realm, domain end ## # Are If-Modified-Since conditional requests enabled? def conditional_requests @agent.conditional_requests end ## # Disables If-Modified-Since conditional requests (enabled by default) def conditional_requests= enabled @agent.conditional_requests = enabled end ## # A Mechanize::CookieJar which stores cookies def cookie_jar @agent.cookie_jar end ## # Replaces the cookie jar with +cookie_jar+ def cookie_jar= cookie_jar @agent.cookie_jar = cookie_jar end ## # Returns a list of cookies stored in the cookie jar. def cookies @agent.cookie_jar.to_a end ## # Follow HTML meta refresh and HTTP Refresh headers. If set to +:anywhere+ # meta refresh tags outside of the head element will be followed. def follow_meta_refresh @agent.follow_meta_refresh end ## # Controls following of HTML meta refresh and HTTP Refresh headers in # responses. def follow_meta_refresh= follow @agent.follow_meta_refresh = follow end ## # Follow an HTML meta refresh and HTTP Refresh headers that have no "url=" # in the content attribute. # # Defaults to false to prevent infinite refresh loops. def follow_meta_refresh_self @agent.follow_meta_refresh_self end ## # Alters the following of HTML meta refresh and HTTP Refresh headers that # point to the same page. def follow_meta_refresh_self= follow @agent.follow_meta_refresh_self = follow end ## # Is gzip compression of responses enabled? def gzip_enabled @agent.gzip_enabled end ## # Disables HTTP/1.1 gzip compression (enabled by default) def gzip_enabled=enabled @agent.gzip_enabled = enabled end ## # Connections that have not been used in this many seconds will be reset. def idle_timeout @agent.idle_timeout end # Sets the idle timeout to +idle_timeout+. The default timeout is 5 # seconds. If you experience "too many connection resets", reducing this # value may help. def idle_timeout= idle_timeout @agent.idle_timeout = idle_timeout end ## # When set to true mechanize will ignore an EOF during chunked transfer # encoding so long as at least one byte was received. Be careful when # enabling this as it may cause data loss. # # Net::HTTP does not inform mechanize of where in the chunked stream the EOF # occurred. Usually it is after the last-chunk but before the terminating # CRLF (invalid termination) but it may occur earlier. In the second case # your response body may be incomplete. def ignore_bad_chunking @agent.ignore_bad_chunking end ## # When set to true mechanize will ignore an EOF during chunked transfer # encoding. See ignore_bad_chunking for further details def ignore_bad_chunking= ignore_bad_chunking @agent.ignore_bad_chunking = ignore_bad_chunking end ## # Are HTTP/1.1 keep-alive connections enabled? def keep_alive @agent.keep_alive end ## # Disable HTTP/1.1 keep-alive connections if +enable+ is set to false. If # you are experiencing "too many connection resets" errors setting this to # false will eliminate them. # # You should first investigate reducing idle_timeout. def keep_alive= enable @agent.keep_alive = enable end ## # The current logger. If no logger has been set Mechanize.log is used. def log @log || Mechanize.log end ## # Sets the +logger+ used by this instance of mechanize def log= logger @log = logger end ## # Responses larger than this will be written to a Tempfile instead of stored # in memory. The default is 100,000 bytes. # # A value of nil disables creation of Tempfiles. def max_file_buffer @agent.max_file_buffer end ## # Sets the maximum size of a response body that will be stored in memory to # +bytes+. A value of nil causes all response bodies to be stored in # memory. # # Note that for Mechanize::Download subclasses, the maximum buffer size # multiplied by the number of pages stored in history (controlled by # #max_history) is an approximate upper limit on the amount of memory # Mechanize will use. By default, Mechanize can use up to ~5MB to store # response bodies for non-File and non-Page (HTML) responses. # # See also the discussion under #max_history= def max_file_buffer= bytes @agent.max_file_buffer = bytes end ## # Length of time to wait until a connection is opened in seconds def open_timeout @agent.open_timeout end ## # Sets the connection open timeout to +open_timeout+ def open_timeout= open_timeout @agent.open_timeout = open_timeout end ## # Length of time to wait for data from the server def read_timeout @agent.read_timeout end ## # Sets the timeout for each chunk of data read from the server to # +read_timeout+. A single request may read many chunks of data. def read_timeout= read_timeout @agent.read_timeout = read_timeout end ## # Controls how mechanize deals with redirects. The following values are # allowed: # # :all, true:: All 3xx redirects are followed (default) # :permanent:: Only 301 Moved Permanently redirects are followed # false:: No redirects are followed def redirect_ok @agent.redirect_ok end alias follow_redirect? redirect_ok ## # Sets the mechanize redirect handling policy. See redirect_ok for allowed # values def redirect_ok= follow @agent.redirect_ok = follow end alias follow_redirect= redirect_ok= ## # Maximum number of redirections to follow def redirection_limit @agent.redirection_limit end ## # Sets the maximum number of redirections to follow to +limit+ def redirection_limit= limit @agent.redirection_limit = limit end ## # Resolve the full path of a link / uri def resolve link @agent.resolve link end ## # A hash of custom request headers that will be sent on every request def request_headers @agent.request_headers end ## # Replaces the custom request headers that will be sent on every request # with +request_headers+ def request_headers= request_headers @agent.request_headers = request_headers end ## # Retry POST and other non-idempotent requests. See RFC 2616 9.1.2. def retry_change_requests @agent.retry_change_requests end ## # When setting +retry_change_requests+ to true you are stating that, for all # the URLs you access with mechanize, making POST and other non-idempotent # requests is safe and will not cause data duplication or other harmful # results. # # If you are experiencing "too many connection resets" errors you should # instead investigate reducing the idle_timeout or disabling keep_alive # connections. def retry_change_requests= retry_change_requests @agent.retry_change_requests = retry_change_requests end ## # Will /robots.txt files be obeyed? def robots @agent.robots end ## # When +enabled+ mechanize will retrieve and obey robots.txt # files def robots= enabled @agent.robots = enabled end ## # The handlers for HTTP and other URI protocols. def scheme_handlers @agent.scheme_handlers end ## # Replaces the URI scheme handler table with +scheme_handlers+ def scheme_handlers= scheme_handlers @agent.scheme_handlers = scheme_handlers end ## # The identification string for the client initiating a web request def user_agent @agent.user_agent end ## # Sets the User-Agent used by mechanize to +user_agent+. See also # user_agent_alias def user_agent= user_agent @agent.user_agent = user_agent end ## # Set the user agent for the Mechanize object based on the given +name+. # # See also AGENT_ALIASES def user_agent_alias= name self.user_agent = AGENT_ALIASES[name] || raise(ArgumentError, "unknown agent alias #{name.inspect}") end ## # The value of watch_for_set is passed to pluggable parsers for retrieved # content attr_accessor :watch_for_set # :section: SSL # # SSL settings for mechanize. These must be set in the block given to # Mechanize.new ## # Path to an OpenSSL server certificate file def ca_file @agent.ca_file end ## # Sets the certificate file used for SSL connections def ca_file= ca_file @agent.ca_file = ca_file end ## # An OpenSSL client certificate or the path to a certificate file. def cert @agent.certificate end ## # Sets the OpenSSL client certificate +cert+ to the given path or # certificate instance def cert= cert @agent.certificate = cert end ## # An OpenSSL certificate store for verifying server certificates. This # defaults to the default certificate store for your system. # # If your system does not ship with a default set of certificates you can # retrieve a copy of the set from Mozilla here: # http://curl.haxx.se/docs/caextract.html # # (Note that this set does not have an HTTPS download option so you may # wish to use the firefox-db2pem.sh script to extract the certificates # from a local install to avoid man-in-the-middle attacks.) # # After downloading or generating a cacert.pem from the above link you # can create a certificate store from the pem file like this: # # cert_store = OpenSSL::X509::Store.new # cert_store.add_file 'cacert.pem' # # And have mechanize use it with: # # agent.cert_store = cert_store def cert_store @agent.cert_store end ## # Sets the OpenSSL certificate store to +store+. # # See also #cert_store def cert_store= cert_store @agent.cert_store = cert_store end ## # What is this? # # Why is it different from #cert? def certificate # :nodoc: @agent.certificate end ## # An OpenSSL private key or the path to a private key def key @agent.private_key end ## # Sets the OpenSSL client +key+ to the given path or key instance. If a # path is given, the path must contain an RSA key file. def key= key @agent.private_key = key end ## # OpenSSL client key password def pass @agent.pass end ## # Sets the client key password to +pass+ def pass= pass @agent.pass = pass end ## # SSL version to use. def ssl_version @agent.ssl_version end ## # Sets the SSL version to use to +version+ without client/server # negotiation. def ssl_version= ssl_version @agent.ssl_version = ssl_version end ## # A callback for additional certificate verification. See # OpenSSL::SSL::SSLContext#verify_callback # # The callback can be used for debugging or to ignore errors by always # returning +true+. Specifying nil uses the default method that was valid # when the SSLContext was created def verify_callback @agent.verify_callback end ## # Sets the OpenSSL certificate verification callback def verify_callback= verify_callback @agent.verify_callback = verify_callback end ## # the OpenSSL server certificate verification method. The default is # OpenSSL::SSL::VERIFY_PEER and certificate verification uses the default # system certificates. See also cert_store def verify_mode @agent.verify_mode end ## # Sets the OpenSSL server certificate verification method. def verify_mode= verify_mode @agent.verify_mode = verify_mode end # :section: Utilities attr_reader :agent # :nodoc: ## # Parses the +body+ of the +response+ from +uri+ using the pluggable parser # that matches its content type def parse uri, response, body content_type = nil unless response['Content-Type'].nil? data, = response['Content-Type'].split ';', 2 content_type, = data.downcase.split ',', 2 unless data.nil? end parser_klass = @pluggable_parser.parser content_type unless parser_klass <= Mechanize::Download then body = case body when IO, Tempfile, StringIO then body.read else body end end parser_klass.new uri, response, body, response.code do |parser| parser.mech = self if parser.respond_to? :mech= parser.watch_for_set = @watch_for_set if @watch_for_set and parser.respond_to?(:watch_for_set=) end end def pretty_print(q) # :nodoc: q.object_group(self) { q.breakable q.pp cookie_jar q.breakable q.pp current_page } end ## # Sets the proxy +address+ at +port+ with an optional +user+ and +password+ def set_proxy address, port, user = nil, password = nil @proxy_addr = address @proxy_port = port @proxy_user = user @proxy_pass = password @agent.set_proxy address, port, user, password end ## # Clears history and cookies. def reset @agent.reset end ## # Shuts down this session by clearing browsing state and closing all # persistent connections. def shutdown reset @agent.shutdown end private ## # Posts +form+ to +uri+ def post_form(uri, form, headers = {}) cur_page = form.page || current_page || Page.new request_data = form.request_data log.debug("query: #{ request_data.inspect }") if log headers = { 'Content-Type' => form.enctype, 'Content-Length' => request_data.size.to_s, }.merge headers # fetch the page page = @agent.fetch uri, :post, headers, [request_data], cur_page add_to_history(page) page end ## # Adds +page+ to the history def add_to_history(page) @agent.history.push(page, @agent.resolve(page.uri)) @history_added.call(page) if @history_added end end require 'mechanize/element_not_found_error' require 'mechanize/response_read_error' require 'mechanize/chunked_termination_error' require 'mechanize/content_type_error' require 'mechanize/cookie' require 'mechanize/cookie_jar' require 'mechanize/parser' require 'mechanize/download' require 'mechanize/directory_saver' require 'mechanize/file' require 'mechanize/file_connection' require 'mechanize/file_request' require 'mechanize/file_response' require 'mechanize/form' require 'mechanize/history' require 'mechanize/http' require 'mechanize/http/agent' require 'mechanize/http/auth_challenge' require 'mechanize/http/auth_realm' require 'mechanize/http/content_disposition_parser' require 'mechanize/http/www_authenticate_parser' require 'mechanize/image' require 'mechanize/page' require 'mechanize/pluggable_parsers' require 'mechanize/redirect_limit_reached_error' require 'mechanize/redirect_not_get_or_head_error' require 'mechanize/response_code_error' require 'mechanize/robots_disallowed_error' require 'mechanize/unauthorized_error' require 'mechanize/unsupported_scheme_error' require 'mechanize/util'