lib/mechanize/http/agent.rb in mechanize-2.1.1 vs lib/mechanize/http/agent.rb in mechanize-2.2
- old
+ new
@@ -76,41 +76,15 @@
# When true, this agent will consult the site's robots.txt for each access.
attr_reader :robots
# :section: SSL
- # Path to an OpenSSL server certificate file
- attr_accessor :ca_file
-
- # An OpenSSL private key or the path to a private key
- attr_accessor :key
-
- # An OpenSSL client certificate or the path to a certificate file.
- attr_accessor :cert
-
- # An SSL certificate store
- attr_accessor :cert_store
-
# OpenSSL key password
attr_accessor :pass
- # A callback for additional certificate verification. See
- # OpenSSL::SSL::SSLContext#verify_callback
- #
- # The callback can be used for debugging or to ignore errors by always
- # returning +true+. Specifying nil uses the default method that was valid
- # when the SSLContext was created
- attr_accessor :verify_callback
-
- # How to verify SSL connections. Defaults to VERIFY_PEER
- attr_accessor :verify_mode
-
# :section: Timeouts
- # Reset connections that have not been used in this many seconds
- attr_reader :idle_timeout
-
# Set to false to disable HTTP/1.1 keep-alive requests
attr_accessor :keep_alive
# Length of time to wait until a connection is opened in seconds
attr_accessor :open_timeout
@@ -121,16 +95,10 @@
# :section:
# The cookies for this agent
attr_accessor :cookie_jar
- # URI for a proxy connection
- attr_reader :proxy_uri
-
- # Retry non-idempotent requests?
- attr_reader :retry_change_requests
-
# Responses larger than this will be written to a Tempfile instead of stored
# in memory.
attr_accessor :max_file_buffer
# :section: Utility
@@ -155,23 +123,19 @@
@cookie_jar = Mechanize::CookieJar.new
@follow_meta_refresh = false
@follow_meta_refresh_self = false
@gzip_enabled = true
@history = Mechanize::History.new
- @idle_timeout = 5
@keep_alive = true
- @keep_alive_time = 300
@max_file_buffer = 10240
@open_timeout = nil
@post_connect_hooks = []
@pre_connect_hooks = []
- @proxy_uri = nil
@read_timeout = nil
@redirect_ok = true
@redirection_limit = 20
@request_headers = {}
- @retry_change_requests = false
@robots = false
@user_agent = nil
@webrobots = nil
# HTTP Authentication
@@ -186,17 +150,11 @@
@password = nil # HTTP auth password
@user = nil # HTTP auth user
@domain = nil # NTLM HTTP domain
# SSL
- @ca_file = nil
- @cert = nil
- @cert_store = nil
- @key = nil
- @pass = nil
- @verify_callback = nil
- @verify_mode = nil
+ @pass = nil
@scheme_handlers = Hash.new { |h, scheme|
h[scheme] = lambda { |link, page|
raise Mechanize::UnsupportedSchemeError, scheme
}
@@ -204,10 +162,14 @@
@scheme_handlers['http'] = lambda { |link, page| link }
@scheme_handlers['https'] = @scheme_handlers['http']
@scheme_handlers['relative'] = @scheme_handlers['http']
@scheme_handlers['file'] = @scheme_handlers['http']
+
+ @http = Net::HTTP::Persistent.new 'mechanize'
+ @http.idle_timeout = 5
+ @http.keep_alive = 300
end
# Retrieves +uri+ and parses it into a page or other object according to
# PluggableParser. If the URI is an HTTP or HTTPS scheme URI the given HTTP
# +method+ is used to retrieve it, along with the HTTP +headers+, request
@@ -271,11 +233,12 @@
res
}
hook_content_encoding response, uri, response_body_io
- response_body_io = response_content_encoding response, response_body_io
+ response_body_io = response_content_encoding response, response_body_io if
+ request.response_body_permitted?
post_connect uri, response, response_body_io
page = response_parse response, response_body_io, uri
@@ -304,15 +267,25 @@
else
raise Mechanize::ResponseCodeError.new(page), "Unhandled response"
end
end
+ # URI for a proxy connection
+
+ def proxy_uri
+ @http.proxy_uri
+ end
+
+ # Retry non-idempotent requests?
+ def retry_change_requests
+ @http.retry_change_requests
+ end
+
# Retry non-idempotent requests
def retry_change_requests= retri
- @retry_change_requests = retri
- @http.retry_change_requests = retri if @http
+ @http.retry_change_requests = retri
end
# :section: Headers
def user_agent= user_agent
@@ -566,61 +539,71 @@
def request_user_agent request
request['User-Agent'] = @user_agent if @user_agent
end
def resolve(uri, referer = current_page)
- uri = uri.dup if uri.is_a?(URI)
+ referer_uri = referer && referer.uri
+ if uri.is_a?(URI)
+ uri = uri.dup
+ elsif uri.nil?
+ if referer_uri
+ return referer_uri
+ end
+ raise ArgumentError, "absolute URL needed (not nil)"
+ else
+ url = uri.to_s.strip
+ if url.empty?
+ if referer_uri
+ return referer_uri.dup.tap { |u| u.fragment = nil }
+ end
+ raise ArgumentError, "absolute URL needed (not #{uri.inspect})"
+ end
- unless uri.is_a?(URI)
- uri = uri.to_s.strip.gsub(/[^#{0.chr}-#{126.chr}]/o) { |match|
+ url.gsub!(/[^#{0.chr}-#{126.chr}]/o) { |match|
if RUBY_VERSION >= "1.9.0"
Mechanize::Util.uri_escape(match)
else
sprintf('%%%X', match.unpack($KCODE == 'UTF8' ? 'U' : 'C')[0])
end
}
- unescaped = uri.split(/(?:%[0-9A-Fa-f]{2})+|#/)
- escaped = uri.scan(/(?:%[0-9A-Fa-f]{2})+|#/)
-
- escaped_uri = Mechanize::Util.html_unescape(
- unescaped.zip(escaped).map { |x,y|
+ escaped_url = Mechanize::Util.html_unescape(
+ url.split(/((?:%[0-9A-Fa-f]{2})+|#)/).each_slice(2).map { |x, y|
"#{WEBrick::HTTPUtils.escape(x)}#{y}"
}.join('')
)
begin
- uri = URI.parse(escaped_uri)
+ uri = URI.parse(escaped_url)
rescue
- uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_uri))
+ uri = URI.parse(WEBrick::HTTPUtils.escape(escaped_url))
end
end
scheme = uri.relative? ? 'relative' : uri.scheme.downcase
uri = @scheme_handlers[scheme].call(uri, referer)
- if referer && referer.uri
+ if referer_uri
if uri.path.length == 0 && uri.relative?
- uri.path = referer.uri.path
+ uri.path = referer_uri.path
end
end
uri.path = '/' if uri.path.length == 0
if uri.relative?
raise ArgumentError, "absolute URL needed (not #{uri})" unless
- referer && referer.uri
+ referer_uri
- base = nil
- if referer.respond_to?(:bases) && referer.parser
- base = referer.bases.last
+ if referer.respond_to?(:bases) && referer.parser &&
+ (lbase = referer.bases.last) && lbase.uri && lbase.uri.absolute?
+ base = lbase
+ else
+ base = nil
end
- uri = ((base && base.uri && base.uri.absolute?) ?
- base.uri :
- referer.uri) + uri
- uri = referer.uri + uri
+ uri = referer_uri + (base ? base.uri : referer_uri) + uri
# Strip initial "/.." bits from the path
uri.path.sub!(/^(\/\.\.)+(?=\/)/, '')
end
unless ['http', 'https', 'file'].include?(uri.scheme.downcase)
@@ -789,11 +772,12 @@
}
end
def response_follow_meta_refresh response, uri, page, redirects
delay, new_url = get_meta_refresh(response, uri, page)
- return nil unless new_url
+ return nil unless delay
+ new_url = new_url ? resolve(new_url, page) : uri
raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
redirects + 1 > @redirection_limit
sleep delay
@@ -886,13 +870,12 @@
raise Mechanize::RedirectLimitReachedError.new(page, redirects) if
redirects + 1 > @redirection_limit
redirect_method = method == :head ? :head : :get
- from_uri = page.uri
- @history.push(page, from_uri)
- new_uri = from_uri + response['Location'].to_s
+ @history.push(page, page.uri)
+ new_uri = resolve response['Location'].to_s, page
fetch new_uri, redirect_method, {}, [], referer, redirects + 1
end
# :section: Robots
@@ -947,20 +930,108 @@
@webrobots ||= WebRobots.new(@user_agent, :http_get => method(:get_robots))
end
# :section: SSL
+ # Path to an OpenSSL CA certificate file
+ def ca_file
+ @http.ca_file
+ end
+
+ # Sets the path to an OpenSSL CA certificate file
+ def ca_file= ca_file
+ @http.ca_file = ca_file
+ end
+
+ # The SSL certificate store used for validating connections
+ def cert_store
+ @http.cert_store
+ end
+
+ # Sets the SSL certificate store used for validating connections
+ def cert_store= cert_store
+ @http.cert_store = cert_store
+ end
+
+ # The client X509 certificate
def certificate
@http.certificate
end
+ # Sets the client certificate to given X509 certificate. If a path is given
+ # the certificate will be loaded and set.
+ def certificate= certificate
+ certificate = if OpenSSL::X509::Certificate === certificate then
+ certificate
+ else
+ OpenSSL::X509::Certificate.new File.read certificate
+ end
+
+ @http.certificate = certificate
+ end
+
+ # An OpenSSL private key or the path to a private key
+ def private_key
+ @http.private_key
+ end
+
+ # Sets the client's private key
+ def private_key= private_key
+ private_key = if OpenSSL::PKey::PKey === private_key then
+ private_key
+ else
+ OpenSSL::PKey::RSA.new File.read(private_key), @pass
+ end
+
+ @http.private_key = private_key
+ end
+
+ # SSL version to use
+ def ssl_version
+ @http.ssl_version
+ end if RUBY_VERSION > '1.9'
+
+ # Sets the SSL version to use
+ def ssl_version= ssl_version
+ @http.ssl_version = ssl_version
+ end if RUBY_VERSION > '1.9'
+
+ # A callback for additional certificate verification. See
+ # OpenSSL::SSL::SSLContext#verify_callback
+ #
+ # The callback can be used for debugging or to ignore errors by always
+ # returning +true+. Specifying nil uses the default method that was valid
+ # when the SSLContext was created
+ def verify_callback
+ @http.verify_callback
+ end
+
+ # Sets the certificate verify callback
+ def verify_callback= verify_callback
+ @http.verify_callback = verify_callback
+ end
+
+ # How to verify SSL connections. Defaults to VERIFY_PEER
+ def verify_mode
+ @http.verify_mode
+ end
+
+ # Sets the mode for verifying SSL connections
+ def verify_mode= verify_mode
+ @http.verify_mode = verify_mode
+ end
+
# :section: Timeouts
- # Sets the conection idle timeout for persistent connections
+ # Reset connections that have not been used in this many seconds
+ def idle_timeout
+ @http.idle_timeout
+ end
+
+ # Sets the connection idle timeout for persistent connections
def idle_timeout= timeout
- @idle_timeout = timeout
- @http.idle_timeout = timeout if @http
+ @http.idle_timeout = timeout
end
# :section: Utility
def inflate compressed, window_bits = nil
@@ -978,52 +1049,22 @@
def log
@context.log
end
- def set_http
- @http = Net::HTTP::Persistent.new 'mechanize', @proxy_uri
-
- @http.keep_alive = @keep_alive_time
- @http.idle_timeout = @idle_timeout if @idle_timeout
- @http.retry_change_requests = @retry_change_requests
-
- @http.ca_file = @ca_file
- @http.cert_store = @cert_store if @cert_store
- @http.verify_callback = @verify_callback
- @http.verify_mode = @verify_mode if @verify_mode
-
- # update our cached value
- @verify_mode = @http.verify_mode
- @cert_store = @http.cert_store
-
- if @cert and @key then
- cert = if OpenSSL::X509::Certificate === @cert then
- @cert
- else
- OpenSSL::X509::Certificate.new ::File.read @cert
- end
-
- key = if OpenSSL::PKey::PKey === @key then
- @key
- else
- OpenSSL::PKey::RSA.new ::File.read(@key), @pass
- end
-
- @http.certificate = cert
- @http.private_key = key
- end
- end
-
##
# Sets the proxy address, port, user, and password +addr+ should be a host,
# with no "http://", +port+ may be a port number, service name or port
# number string.
- def set_proxy(addr, port, user = nil, pass = nil)
- return unless addr and port
+ def set_proxy addr, port, user = nil, pass = nil
+ unless addr and port then
+ @http.proxy = nil
+ return
+ end
+
unless Integer === port then
begin
port = Socket.getservbyname port
rescue SocketError
begin
@@ -1032,15 +1073,15 @@
raise ArgumentError, "invalid value for port: #{port.inspect}"
end
end
end
- @proxy_uri = URI "http://#{addr}"
- @proxy_uri.port = port
- @proxy_uri.user = user if user
- @proxy_uri.password = pass if pass
+ proxy_uri = URI "http://#{addr}"
+ proxy_uri.port = port
+ proxy_uri.user = user if user
+ proxy_uri.password = pass if pass
- @proxy_uri
+ @http.proxy = proxy_uri
end
end