lib/mechanize.rb in mechanize-2.1 vs lib/mechanize.rb in mechanize-2.1.1

- old
+ new

@@ -2,11 +2,10 @@ require 'forwardable' require 'iconv' if RUBY_VERSION < '1.9.2' require 'mutex_m' require 'net/http/digest_auth' require 'net/http/persistent' -require 'nkf' require 'nokogiri' require 'openssl' require 'pp' require 'stringio' require 'uri' @@ -14,11 +13,11 @@ require 'zlib' ## # The Mechanize library is used for automating interactions with a website. It # can follow links and submit forms. Form fields can be populated and -# submitted. A history of URL's is maintained and can be queried. +# submitted. A history of URLs is maintained and can be queried. # # == Example # # require 'mechanize' # require 'logger' @@ -31,17 +30,51 @@ # search_form = page.form_with :name => "f" # search_form.field_with(:name => "q").value = "Hello" # # search_results = agent.submit search_form # puts search_results.body +# +# == Issues with mechanize +# +# If you think you have a bug with mechanize, but aren't sure, please file a +# ticket at https://github.com/tenderlove/mechanize/issues +# +# Here are some common problems you may experience with mechanize +# +# === Problems connecting to SSL sites +# +# Mechanize defaults to validating SSL certificates using the default CA +# certificates for your platform. At this time, Windows users do not have +# integration between the OS default CA certificates and OpenSSL. #cert_store +# explains how to download and use Mozilla's CA certificates to allow SSL +# sites to work. +# +# === Problems with content-length +# +# Some sites return an incorrect content-length value. Unlike a browser, +# mechanize raises an error when the content-length header does not match the +# response length since it does not know if there was a connection problem or +# if the mismatch is a server bug. +# +# The error raised, Mechanize::ResponseReadError, can be converted to a parsed +# Page, File, etc. depending upon the content-type: +# +# agent = Mechanize.new +# uri = URI 'http://example/invalid_content_length' +# +# begin +# page = agent.get uri +# rescue Mechanize::ResponseReadError => e +# page = e.force_parse +# end class Mechanize ## # The version of Mechanize you are using. - VERSION = '2.1' + VERSION = '2.1.1' ## # Base mechanize error class class Error < RuntimeError @@ -135,10 +168,13 @@ @html_parser = self.class.html_parser @default_encoding = nil @force_default_encoding = false + # defaults + @agent.max_history = 50 + yield self if block_given? @agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass @agent.set_http end @@ -177,10 +213,15 @@ @agent.history.max_size end ## # Sets the maximum number of items allowed in the history to +length+. + # + # Setting the maximum history length to nil will make the history size + # unlimited. Take care when doing this, mechanize stores page bodies in the + # temporary files directory for pages in the history. For a long-running + # mechanize program this can be quite large. def max_history= length @agent.history.max_size = length end @@ -516,14 +557,16 @@ attr_reader :proxy_user ## # Sets the user and password to be used for HTTP authentication. + # sets the optional domain for NTLM authentication - def auth(user, password) + def auth(user, password, domain = nil) @agent.user = user @agent.password = password + @agent.domain = domain end alias basic_auth auth ## @@ -867,17 +910,37 @@ @agent.cert = cert end ## # An OpenSSL certificate store for verifying server certificates. This - # defaults to the default certificate store. + # defaults to the default certificate store for your system. + # + # If your system does not ship with a default set of certificates you can + # retrieve a copy of the set from Mozilla here: + # http://curl.haxx.se/docs/caextract.html + # + # (Note that this set does not have an HTTPS download option so you may + # wish to use the firefox-db2pem.sh script to extract the certificates + # from a local install to avoid man-in-the-middle attacks.) + # + # After downloading or generating a cacert.pem from the above link you + # can create a certificate store from the pem file like this: + # + # cert_store = OpenSSL::X509::Store.new + # cert_store.add_file 'cacert.pem' + # + # And have mechanize use it with: + # + # agent.cert_store = cert_store def cert_store @agent.cert_store end ## # Sets the OpenSSL certificate store to +store+. + # + # See also #cert_store def cert_store= cert_store @agent.cert_store = cert_store end