require 'tap/http/utils' require 'net/http' require 'thread' module Tap module Http # :startdoc::manifest submits an http request # # Dispatch is a base class for submitting HTTP requests from a request # hash. Multiple requests may be submitted on individual threads, up # to a configurable limit. # # Request hashes are like the following: # # request_method: GET # url: http://tap.rubyforge.org/ # headers: {} # params: {} # version: 1.1 # # Missing fields are added from the task configuration. Note that since # Dispatch takes hash inputs, it is often convenient to save requests in # a .yml file and sequence dispatch with load: # # [requests.yml] # - url: http://tap.rubyforge.org/ # - url: http://tap.rubyforge.org/about.html # # % rap load requests.yml --:i dispatch --+ dump # # :startdoc::manifest-end # === Dispatch Methods # # Dispatch itself provides methods for constructing and submitting get and # post HTTP requests from a request hash. # # res = Tap::Http::Dispatch.submit( # :url => "http://tap.rubyforge.org", # :version => '1.1', # :request_method => 'GET', # :headers => {}, # :params => {} # ) # res.inspect # => "#" # res.body =~ /Tap/ # => true # # Headers and parameters take the form: # # { # 'single' => 'value', # 'multiple' => ['value one', 'value two'] # } # # To capture request hashes from web forms using Firefox, see the README. class Dispatch < Tap::Task class << self def intern(*args, &block) instance = new(*args) instance.extend Support::Intern(:process_response) instance.process_response_block = block instance end # Constructs and submits an http request to the url using the request hash. # Request hashes are like this: # # { # :url => "http://tap.rubyforge.org", # :version => '1.1', # :request_method => 'GET', # :headers => {}, # :params => {} # } # # If left unspecified, the default configuration values will be used (but # note that since the default url is nil, a url MUST be specified). # Headers and parameters can use array values to specifiy multiple values # for the same key. # # Submit only support get and post request methods; see construct_get and # construct_post for more details. A block may be given to receive the # Net::HTTP and request just prior to submission. # # Returns the Net::HTTP response. # def submit(request_hash) url_or_uri = request_hash[:url] || configurations[:url].default headers = request_hash[:headers] || configurations[:headers].default params = request_hash[:params] || configurations[:params].default request_method = request_hash[:request_method] || configurations[:request_method].default version = request_hash[:version] || configurations[:version].default raise ArgumentError, "no url specified" unless url_or_uri uri = url_or_uri.kind_of?(URI) ? url_or_uri : URI.parse(url_or_uri) uri.path = "/" if uri.path.empty? # construct the request based on the method request = case request_method.to_s when /^get$/i then construct_get(uri, headers, params) when /^post$/i then construct_post(uri, headers, params) else raise ArgumentError, "unsupported request method: #{request_method}" end # set the http version version_method = "version_#{version.to_s.gsub(".", "_")}".to_sym if ::Net::HTTP.respond_to?(version_method) ::Net::HTTP.send(version_method) else raise ArgumentError, "unsupported HTTP version: #{version}" end # submit the request res = ::Net::HTTP.new(uri.host, uri.port).start do |http| yield(http, request) if block_given? http.request(request) end # fetch redirections redirection_limit = request_hash[:redirection_limit] redirection_limit ? fetch_redirection(res, redirection_limit) : res end # Constructs a Net::HTTP::Post query, setting headers and parameters. # # ==== Supported Content Types: # # - application/x-www-form-urlencoded (the default) # - multipart/form-data # # The multipart/form-data content type may specify a boundary. If no # boundary is specified, a randomly generated boundary will be used # to delimit the parameters. # # post = construct_post( # URI.parse('http://some.url/'), # {:content_type => 'multipart/form-data; boundary=1234'}, # {:key => 'value'}) # # post.body # # => %Q{--1234\r # # Content-Disposition: form-data; name="key"\r # # \r # # value\r # # --1234--\r # # } # # (Note the carriage returns are required in multipart content) # # The content-length header is determined automatically from the # formatted request body; manually specified content-length headers # will be overridden. # def construct_post(uri, headers, params) req = ::Net::HTTP::Post.new( URI.encode("#{uri.path}#{format_query(uri)}") ) headers = headerize_keys(headers) content_type = headers['Content-Type'] case content_type when nil, /^application\/x-www-form-urlencoded$/i req.body = format_www_form_urlencoded(params) headers['Content-Type'] ||= "application/x-www-form-urlencoded" headers['Content-Length'] = req.body.length when /^multipart\/form-data(;\s*boundary=(.*))?$/i # extract the boundary if it exists boundary = $2 || rand.to_s[2..20] req.body = format_multipart_form_data(params, boundary) headers['Content-Type'] = "multipart/form-data; boundary=#{boundary}" headers['Content-Length'] = req.body.length else raise ArgumentError, "unsupported Content-Type for POST: #{content_type}" end headers.each_pair { |key, value| req[key] = value } req end # Constructs a Net::HTTP::Get query. All parameters in uri and params are # encoded and added to the request URI. # # get = construct_get(URI.parse('http://some.url/path'), {}, {:key => 'value'}) # get.path # => "/path?key=value" # def construct_get(uri, headers, params) req = ::Net::HTTP::Get.new( URI.encode("#{uri.path}#{format_query(uri, params)}") ) headerize_keys(headers).each_pair { |key, value| req[key] = value } req end # Checks the type of the response; if it is a redirection, fetches the # redirection. Otherwise return the response. # # Notes: # - Fetch will recurse up to the input redirection limit (default 10) # - Responses that are not Net::HTTPRedirection or Net::HTTPSuccess # raise an error. def fetch_redirection(res, limit=10) raise 'exceeded the redirection limit' if limit < 1 case res when ::Net::HTTPRedirection redirect = ::Net::HTTP.get_response( URI.parse(res['location']) ) fetch_redirection(redirect, limit - 1) when ::Net::HTTPSuccess res else raise StandardError, res.error! end end # Constructs a URI query string from the uri and the input parameters. # Multiple values for a parameter may be specified using an array. # The query is not encoded, so you may need to URI.encode it later. # # format_query(URI.parse('http://some.url/path'), {:key => 'value'}) # # => "?key=value" # # format_query(URI.parse('http://some.url/path?one=1'), {:two => '2'}) # # => "?one=1&two=2" # def format_query(uri, params={}) query = [] query << uri.query if uri.query params.each_pair do |key, values| values = [values] unless values.kind_of?(Array) values.each { |value| query << "#{key}=#{value}" } end "#{query.empty? ? '' : '?'}#{query.join('&')}" end # Formats params as 'application/x-www-form-urlencoded' for use as the # body of a post request. Multiple values for a parameter may be # specified using an array. The result is obviously URI encoded. # # format_www_form_urlencoded(:key => 'value with spaces') # # => "key=value%20with%20spaces" # def format_www_form_urlencoded(params={}) query = [] params.each_pair do |key, values| values = [values] unless values.kind_of?(Array) values.each { |value| query << "#{key}=#{value}" } end URI.encode( query.join('&') ) end # Formats params as 'multipart/form-data' using the specified boundary, # for use as the body of a post request. Multiple values for a parameter # may be specified using an array. All newlines include a carriage # return for proper formatting. # # format_multipart_form_data(:key => 'value') # # => %Q{--1234567890\r # # Content-Disposition: form-data; name="key"\r # # \r # # value\r # # --1234567890--\r # # } # # To specify a file, use a hash of file-related headers. # # format_multipart_form_data(:key => { # 'Content-Type' => 'text/plain', # 'Filename' => "path/to/file.txt"} # ) # # => %Q{--1234567890\r # # Content-Disposition: form-data; name="key"; filename="path/to/file.txt"\r # # Content-Type: text/plain\r # # \r # # \r # # --1234567890--\r # # } # def format_multipart_form_data(params, boundary="1234567890") body = [] params.each_pair do |key, values| values = [values] unless values.kind_of?(Array) values.each do |value| body << case value when Hash hash = headerize_keys(value) filename = hash.delete('Filename') || "" content = File.exists?(filename) ? File.read(filename) : "" header = "Content-Disposition: form-data; name=\"#{key.to_s}\"; filename=\"#{filename}\"\r\n" hash.each_pair { |key, value| header << "#{key}: #{value}\r\n" } "#{header}\r\n#{content}\r\n" else %Q{Content-Disposition: form-data; name="#{key.to_s}"\r\n\r\n#{value.to_s}\r\n} end end end body.collect {|p| "--#{boundary}\r\n#{p}" }.join('') + "--#{boundary}--\r\n" end protected # Helper to headerize the keys of a hash to headers. # See Utils#headerize. def headerize_keys(hash) # :nodoc: result = {} hash.each_pair do |key, value| result[Utils.headerize(key)] = value end result end end config :url, nil # the target url config :headers, {}, &c.hash # a hash of request headers config :params, {}, &c.hash # a hash of query parameters config :request_method, 'GET' # the request method (get or post) config :version, 1.1 # the HTTP version config :redirection_limit, nil, &c.integer_or_nil # the redirection limit for the request config :max_threads, 10, &c.integer # the maximum number of request threads # Prepares the request_hash by symbolizing keys and adding missing # parameters using the current configuration values. def prepare(request_hash) request_hash.inject( :url => url, :headers => headers, :params => params, :request_method => request_method, :version => version, :redirection_limit => redirection_limit ) do |options, (key, value)| options[(key.to_sym rescue key) || key] = value options end end def process(*requests) # build a queue of all the requests to be handled queue = Queue.new requests.each_with_index do |request, index| queue.enq [prepare(request), index] index += 1 end # submit and retrieve all requests before processing # responses. this assures responses are processed # in order, in case it matters. lock = Mutex.new responses = [] request_threads = Array.new(max_threads) do Thread.new do begin while !queue.empty? request, index = queue.deq(true) log(request[:request_method], request[:url]) res = Dispatch.submit(request) lock.synchronize { responses[index] = res } end rescue(ThreadError) # Catch errors due to the queue being empty. # (this should not occur as the queue is checked) raise $! unless $!.message == 'queue empty' end end end request_threads.each {|thread| thread.join } # process responses and collect results errors = [] responses = responses.collect do |res| begin process_response(res) rescue(ResponseError) errors << [$!, responses.index(res)] nil end end unless errors.empty? handle_response_errors(responses, errors) end responses end # Hook for processing a response. By default process_response # simply logs the response message and returns the response. def process_response(res) log(nil, res.message) res end # A hook for handling a batch of response errors, perhaps # doing something meaningful with the successful responses. # By default, concatenates the error messages and raises # a new ResponseError. def handle_response_errors(responses, errors) errors.collect! {|error, n| "request #{n}: #{error.message}"} errors.unshift("Error processing responses:") raise ResponseError, errors.join("\n") end class ResponseError < StandardError end end end end