lib/mechanize.rb in mechanize-2.1 vs lib/mechanize.rb in mechanize-2.1.1
- old
+ new
@@ -2,11 +2,10 @@
require 'forwardable'
require 'iconv' if RUBY_VERSION < '1.9.2'
require 'mutex_m'
require 'net/http/digest_auth'
require 'net/http/persistent'
-require 'nkf'
require 'nokogiri'
require 'openssl'
require 'pp'
require 'stringio'
require 'uri'
@@ -14,11 +13,11 @@
require 'zlib'
##
# The Mechanize library is used for automating interactions with a website. It
# can follow links and submit forms. Form fields can be populated and
-# submitted. A history of URL's is maintained and can be queried.
+# submitted. A history of URLs is maintained and can be queried.
#
# == Example
#
# require 'mechanize'
# require 'logger'
@@ -31,17 +30,51 @@
# search_form = page.form_with :name => "f"
# search_form.field_with(:name => "q").value = "Hello"
#
# search_results = agent.submit search_form
# puts search_results.body
+#
+# == Issues with mechanize
+#
+# If you think you have a bug with mechanize, but aren't sure, please file a
+# ticket at https://github.com/tenderlove/mechanize/issues
+#
+# Here are some common problems you may experience with mechanize
+#
+# === Problems connecting to SSL sites
+#
+# Mechanize defaults to validating SSL certificates using the default CA
+# certificates for your platform. At this time, Windows users do not have
+# integration between the OS default CA certificates and OpenSSL. #cert_store
+# explains how to download and use Mozilla's CA certificates to allow SSL
+# sites to work.
+#
+# === Problems with content-length
+#
+# Some sites return an incorrect content-length value. Unlike a browser,
+# mechanize raises an error when the content-length header does not match the
+# response length since it does not know if there was a connection problem or
+# if the mismatch is a server bug.
+#
+# The error raised, Mechanize::ResponseReadError, can be converted to a parsed
+# Page, File, etc. depending upon the content-type:
+#
+# agent = Mechanize.new
+# uri = URI 'http://example/invalid_content_length'
+#
+# begin
+# page = agent.get uri
+# rescue Mechanize::ResponseReadError => e
+# page = e.force_parse
+# end
class Mechanize
##
# The version of Mechanize you are using.
- VERSION = '2.1'
+ VERSION = '2.1.1'
##
# Base mechanize error class
class Error < RuntimeError
@@ -135,10 +168,13 @@
@html_parser = self.class.html_parser
@default_encoding = nil
@force_default_encoding = false
+ # defaults
+ @agent.max_history = 50
+
yield self if block_given?
@agent.set_proxy @proxy_addr, @proxy_port, @proxy_user, @proxy_pass
@agent.set_http
end
@@ -177,10 +213,15 @@
@agent.history.max_size
end
##
# Sets the maximum number of items allowed in the history to +length+.
+ #
+ # Setting the maximum history length to nil will make the history size
+ # unlimited. Take care when doing this, mechanize stores page bodies in the
+ # temporary files directory for pages in the history. For a long-running
+ # mechanize program this can be quite large.
def max_history= length
@agent.history.max_size = length
end
@@ -516,14 +557,16 @@
attr_reader :proxy_user
##
# Sets the user and password to be used for HTTP authentication.
+ # sets the optional domain for NTLM authentication
- def auth(user, password)
+ def auth(user, password, domain = nil)
@agent.user = user
@agent.password = password
+ @agent.domain = domain
end
alias basic_auth auth
##
@@ -867,17 +910,37 @@
@agent.cert = cert
end
##
# An OpenSSL certificate store for verifying server certificates. This
- # defaults to the default certificate store.
+ # defaults to the default certificate store for your system.
+ #
+ # If your system does not ship with a default set of certificates you can
+ # retrieve a copy of the set from Mozilla here:
+ # http://curl.haxx.se/docs/caextract.html
+ #
+ # (Note that this set does not have an HTTPS download option so you may
+ # wish to use the firefox-db2pem.sh script to extract the certificates
+ # from a local install to avoid man-in-the-middle attacks.)
+ #
+ # After downloading or generating a cacert.pem from the above link you
+ # can create a certificate store from the pem file like this:
+ #
+ # cert_store = OpenSSL::X509::Store.new
+ # cert_store.add_file 'cacert.pem'
+ #
+ # And have mechanize use it with:
+ #
+ # agent.cert_store = cert_store
def cert_store
@agent.cert_store
end
##
# Sets the OpenSSL certificate store to +store+.
+ #
+ # See also #cert_store
def cert_store= cert_store
@agent.cert_store = cert_store
end