lib/tap/http/dispatch.rb in tap-http-0.1.0 vs lib/tap/http/dispatch.rb in tap-http-0.2.0

- old
+ new

@@ -1,280 +1,411 @@ -require 'tap/http/helpers' +require 'tap/http/utils' require 'net/http' +require 'thread' module Tap module Http - - # Dispatch provides methods for constructing and submitting get and post - # HTTP requests from a configuration hash. + + # :startdoc::manifest submits an http request # - # res = Tap::Http::Dispatch.submit_request( + # Dispatch is a base class for submitting HTTP requests from a request + # hash. Multiple requests may be submitted on individual threads, up + # to a configurable limit. + # + # Request hashes are like the following: + # + # request_method: GET + # url: http://tap.rubyforge.org/ + # headers: {} + # params: {} + # version: 1.1 + # + # Missing fields are added from the task configuration. Note that since + # Dispatch takes hash inputs, it is often convenient to save requests in + # a .yml file and sequence dispatch with load: + # + # [requests.yml] + # - url: http://tap.rubyforge.org/ + # - url: http://tap.rubyforge.org/about.html + # + # % rap load requests.yml --:i dispatch --+ dump + # + # :startdoc::manifest-end + # === Dispatch Methods + # + # Dispatch itself provides methods for constructing and submitting get and + # post HTTP requests from a request hash. + # + # res = Tap::Http::Dispatch.submit( # :url => "http://tap.rubyforge.org", # :version => '1.1', # :request_method => 'GET', # :headers => {}, # :params => {} # ) - # res.inspect # => "#<Net::HTTPOK 200 OK readbody=true>" - # res.body =~ /Tap/ # => true + # res.inspect # => "#<Net::HTTPOK 200 OK readbody=true>" + # res.body =~ /Tap/ # => true # # Headers and parameters take the form: # - # { 'single' => 'value', - # 'multiple' => ['value one', 'value two']} + # { + # 'single' => 'value', + # 'multiple' => ['value one', 'value two'] + # } # - module Dispatch - module_function - - DEFAULT_CONFIG = { - :request_method => 'GET', - :version => '1.1', - :params => {}, - :headers => {}, - :redirection_limit => nil - } - - # Constructs and submits a request to the url using the request configuration. - # A url must be specified in the configuration, but other configurations are - # optional; if unspecified, the values in DEFAULT_CONFIG will be used. A - # block may be given to receive the Net::HTTP and request just prior to - # submission. - # - # Returns the response from the submission. - # - def submit_request(config) - symbolized = DEFAULT_CONFIG.dup - config.each_pair do |key, value| - symbolized[key.to_sym] = value + # To capture request hashes from web forms using Firefox, see the README. + class Dispatch < Tap::Task + class << self + def intern(*args, &block) + instance = new(*args) + instance.extend Support::Intern(:process_response) + instance.process_response_block = block + instance end - config = symbolized - request_method = (config[:request_method]).to_s - url_or_uri = config[:url] - version = config[:version] - params = config[:params] - headers = headerize_keys(config[:headers]) - - raise ArgumentError, "no url specified" unless url_or_uri - uri = url_or_uri.kind_of?(URI) ? url_or_uri : URI.parse(url_or_uri) - uri.path = "/" if uri.path.empty? - - # construct the request based on the method - request = case request_method - when /^get$/i then construct_get(uri, headers, params) - when /^post$/i then construct_post(uri, headers, params) - else - raise ArgumentError, "unsupported request method: #{request_method}" - end - - # set the http version - version_method = "version_#{version.to_s.gsub(".", "_")}".to_sym - if ::Net::HTTP.respond_to?(version_method) - ::Net::HTTP.send(version_method) - else - raise ArgumentError, "unsupported HTTP version: #{version}" - end + # Constructs and submits an http request to the url using the request hash. + # Request hashes are like this: + # + # { + # :url => "http://tap.rubyforge.org", + # :version => '1.1', + # :request_method => 'GET', + # :headers => {}, + # :params => {} + # } + # + # If left unspecified, the default configuration values will be used (but + # note that since the default url is nil, a url MUST be specified). + # Headers and parameters can use array values to specifiy multiple values + # for the same key. + # + # Submit only support get and post request methods; see construct_get and + # construct_post for more details. A block may be given to receive the + # Net::HTTP and request just prior to submission. + # + # Returns the Net::HTTP response. + # + def submit(request_hash) + url_or_uri = request_hash[:url] || configurations[:url].default + headers = request_hash[:headers] || configurations[:headers].default + params = request_hash[:params] || configurations[:params].default + request_method = request_hash[:request_method] || configurations[:request_method].default + version = request_hash[:version] || configurations[:version].default + + raise ArgumentError, "no url specified" unless url_or_uri + uri = url_or_uri.kind_of?(URI) ? url_or_uri : URI.parse(url_or_uri) + uri.path = "/" if uri.path.empty? - # submit the request - res = ::Net::HTTP.new(uri.host, uri.port).start do |http| - yield(http, request) if block_given? - http.request(request) + # construct the request based on the method + request = case request_method.to_s + when /^get$/i then construct_get(uri, headers, params) + when /^post$/i then construct_post(uri, headers, params) + else + raise ArgumentError, "unsupported request method: #{request_method}" + end + + # set the http version + version_method = "version_#{version.to_s.gsub(".", "_")}".to_sym + if ::Net::HTTP.respond_to?(version_method) + ::Net::HTTP.send(version_method) + else + raise ArgumentError, "unsupported HTTP version: #{version}" + end + + # submit the request + res = ::Net::HTTP.new(uri.host, uri.port).start do |http| + yield(http, request) if block_given? + http.request(request) + end + + # fetch redirections + redirection_limit = request_hash[:redirection_limit] + redirection_limit ? fetch_redirection(res, redirection_limit) : res end - - # fetch redirections - redirection_limit = config[:redirection_limit] - redirection_limit ? fetch_redirection(res, redirection_limit) : res - end - # Constructs a Net::HTTP::Post query, setting headers and parameters. - # - # ==== Supported Content Types: - # - # - application/x-www-form-urlencoded (the default) - # - multipart/form-data - # - # The multipart/form-data content type may specify a boundary. If no - # boundary is specified, a randomly generated boundary will be used - # to delimit the parameters. - # - # post = construct_post( - # URI.parse('http://some.url/'), - # {:content_type => 'multipart/form-data; boundary=1234'}, - # {:key => 'value'}) - # - # post.body - # # => %Q{--1234\r - # # Content-Disposition: form-data; name="key"\r - # # \r - # # value\r - # # --1234--\r - # # } - # - # (Note the carriage returns are required in multipart content) - # - # The content-length header is determined automatically from the - # formatted request body; manually specified content-length headers - # will be overridden. - # - def construct_post(uri, headers, params) - req = ::Net::HTTP::Post.new( URI.encode("#{uri.path}#{format_query(uri)}") ) - headers = headerize_keys(headers) - content_type = headers['Content-Type'] + # Constructs a Net::HTTP::Post query, setting headers and parameters. + # + # ==== Supported Content Types: + # + # - application/x-www-form-urlencoded (the default) + # - multipart/form-data + # + # The multipart/form-data content type may specify a boundary. If no + # boundary is specified, a randomly generated boundary will be used + # to delimit the parameters. + # + # post = construct_post( + # URI.parse('http://some.url/'), + # {:content_type => 'multipart/form-data; boundary=1234'}, + # {:key => 'value'}) + # + # post.body + # # => %Q{--1234\r + # # Content-Disposition: form-data; name="key"\r + # # \r + # # value\r + # # --1234--\r + # # } + # + # (Note the carriage returns are required in multipart content) + # + # The content-length header is determined automatically from the + # formatted request body; manually specified content-length headers + # will be overridden. + # + def construct_post(uri, headers, params) + req = ::Net::HTTP::Post.new( URI.encode("#{uri.path}#{format_query(uri)}") ) + headers = headerize_keys(headers) + content_type = headers['Content-Type'] - case content_type - when nil, /^application\/x-www-form-urlencoded$/i - req.body = format_www_form_urlencoded(params) - headers['Content-Type'] ||= "application/x-www-form-urlencoded" - headers['Content-Length'] = req.body.length - - when /^multipart\/form-data(;\s*boundary=(.*))?$/i - # extract the boundary if it exists - boundary = $2 || rand.to_s[2..20] + case content_type + when nil, /^application\/x-www-form-urlencoded$/i + req.body = format_www_form_urlencoded(params) + headers['Content-Type'] ||= "application/x-www-form-urlencoded" + headers['Content-Length'] = req.body.length - req.body = format_multipart_form_data(params, boundary) - headers['Content-Type'] = "multipart/form-data; boundary=#{boundary}" - headers['Content-Length'] = req.body.length + when /^multipart\/form-data(;\s*boundary=(.*))?$/i + # extract the boundary if it exists + boundary = $2 || rand.to_s[2..20] - else - raise ArgumentError, "unsupported Content-Type for POST: #{content_type}" + req.body = format_multipart_form_data(params, boundary) + headers['Content-Type'] = "multipart/form-data; boundary=#{boundary}" + headers['Content-Length'] = req.body.length + + else + raise ArgumentError, "unsupported Content-Type for POST: #{content_type}" + end + + headers.each_pair { |key, value| req[key] = value } + req end - - headers.each_pair { |key, value| req[key] = value } - req - end - # Constructs a Net::HTTP::Get query. All parameters in uri and params are - # encoded and added to the request URI. - # - # get = construct_get(URI.parse('http://some.url/path'), {}, {:key => 'value'}) - # get.path # => "/path?key=value" - # - def construct_get(uri, headers, params) - req = ::Net::HTTP::Get.new( URI.encode("#{uri.path}#{format_query(uri, params)}") ) - headerize_keys(headers).each_pair { |key, value| req[key] = value } - req - end - - # Checks the type of the response; if it is a redirection, fetches the - # redirection. Otherwise return the response. - # - # Notes: - # - Fetch will recurse up to the input redirection limit (default 10) - # - Responses that are not Net::HTTPRedirection or Net::HTTPSuccess - # raise an error. - def fetch_redirection(res, limit=10) - raise 'exceeded the redirection limit' if limit < 1 + # Constructs a Net::HTTP::Get query. All parameters in uri and params are + # encoded and added to the request URI. + # + # get = construct_get(URI.parse('http://some.url/path'), {}, {:key => 'value'}) + # get.path # => "/path?key=value" + # + def construct_get(uri, headers, params) + req = ::Net::HTTP::Get.new( URI.encode("#{uri.path}#{format_query(uri, params)}") ) + headerize_keys(headers).each_pair { |key, value| req[key] = value } + req + end - case res - when ::Net::HTTPRedirection - redirect = ::Net::HTTP.get_response( URI.parse(res['location']) ) - fetch_redirection(redirect, limit - 1) - when ::Net::HTTPSuccess - res - else - raise StandardError, res.error! - end - end - - # Converts the keys of a hash to headers. See Helpers#headerize. - # - # headerize_keys('some_header' => 'value') # => {'Some-Header' => 'value'} - # - def headerize_keys(hash) - result = {} - hash.each_pair do |key, value| - result[Helpers.headerize(key)] = value + # Checks the type of the response; if it is a redirection, fetches the + # redirection. Otherwise return the response. + # + # Notes: + # - Fetch will recurse up to the input redirection limit (default 10) + # - Responses that are not Net::HTTPRedirection or Net::HTTPSuccess + # raise an error. + def fetch_redirection(res, limit=10) + raise 'exceeded the redirection limit' if limit < 1 + + case res + when ::Net::HTTPRedirection + redirect = ::Net::HTTP.get_response( URI.parse(res['location']) ) + fetch_redirection(redirect, limit - 1) + when ::Net::HTTPSuccess + res + else + raise StandardError, res.error! + end end - result - end - # Constructs a URI query string from the uri and the input parameters. - # Multiple values for a parameter may be specified using an array. - # The query is not encoded, so you may need to URI.encode it later. - # - # format_query(URI.parse('http://some.url/path'), {:key => 'value'}) - # # => "?key=value" - # - # format_query(URI.parse('http://some.url/path?one=1'), {:two => '2'}) - # # => "?one=1&two=2" - # - def format_query(uri, params={}) - query = [] - query << uri.query if uri.query - params.each_pair do |key, values| - values = [values] unless values.kind_of?(Array) - values.each { |value| query << "#{key}=#{value}" } + # Constructs a URI query string from the uri and the input parameters. + # Multiple values for a parameter may be specified using an array. + # The query is not encoded, so you may need to URI.encode it later. + # + # format_query(URI.parse('http://some.url/path'), {:key => 'value'}) + # # => "?key=value" + # + # format_query(URI.parse('http://some.url/path?one=1'), {:two => '2'}) + # # => "?one=1&two=2" + # + def format_query(uri, params={}) + query = [] + query << uri.query if uri.query + params.each_pair do |key, values| + values = [values] unless values.kind_of?(Array) + values.each { |value| query << "#{key}=#{value}" } + end + "#{query.empty? ? '' : '?'}#{query.join('&')}" end - "#{query.empty? ? '' : '?'}#{query.join('&')}" + + # Formats params as 'application/x-www-form-urlencoded' for use as the + # body of a post request. Multiple values for a parameter may be + # specified using an array. The result is obviously URI encoded. + # + # format_www_form_urlencoded(:key => 'value with spaces') + # # => "key=value%20with%20spaces" + # + def format_www_form_urlencoded(params={}) + query = [] + params.each_pair do |key, values| + values = [values] unless values.kind_of?(Array) + values.each { |value| query << "#{key}=#{value}" } + end + URI.encode( query.join('&') ) + end + + # Formats params as 'multipart/form-data' using the specified boundary, + # for use as the body of a post request. Multiple values for a parameter + # may be specified using an array. All newlines include a carriage + # return for proper formatting. + # + # format_multipart_form_data(:key => 'value') + # # => %Q{--1234567890\r + # # Content-Disposition: form-data; name="key"\r + # # \r + # # value\r + # # --1234567890--\r + # # } + # + # To specify a file, use a hash of file-related headers. + # + # format_multipart_form_data(:key => { + # 'Content-Type' => 'text/plain', + # 'Filename' => "path/to/file.txt"} + # ) + # # => %Q{--1234567890\r + # # Content-Disposition: form-data; name="key"; filename="path/to/file.txt"\r + # # Content-Type: text/plain\r + # # \r + # # \r + # # --1234567890--\r + # # } + # + def format_multipart_form_data(params, boundary="1234567890") + body = [] + params.each_pair do |key, values| + values = [values] unless values.kind_of?(Array) + + values.each do |value| + body << case value + when Hash + hash = headerize_keys(value) + filename = hash.delete('Filename') || "" + content = File.exists?(filename) ? File.read(filename) : "" + + header = "Content-Disposition: form-data; name=\"#{key.to_s}\"; filename=\"#{filename}\"\r\n" + hash.each_pair { |key, value| header << "#{key}: #{value}\r\n" } + "#{header}\r\n#{content}\r\n" + else + %Q{Content-Disposition: form-data; name="#{key.to_s}"\r\n\r\n#{value.to_s}\r\n} + end + end + end + + body.collect {|p| "--#{boundary}\r\n#{p}" }.join('') + "--#{boundary}--\r\n" + end + + protected + + # Helper to headerize the keys of a hash to headers. + # See Utils#headerize. + def headerize_keys(hash) # :nodoc: + result = {} + hash.each_pair do |key, value| + result[Utils.headerize(key)] = value + end + result + end end - # Formats params as 'application/x-www-form-urlencoded' for use as the - # body of a post request. Multiple values for a parameter may be - # specified using an array. The result is obviously URI encoded. - # - # format_www_form_urlencoded(:key => 'value with spaces') - # # => "key=value%20with%20spaces" - # - def format_www_form_urlencoded(params={}) - query = [] - params.each_pair do |key, values| - values = [values] unless values.kind_of?(Array) - values.each { |value| query << "#{key}=#{value}" } - end - URI.encode( query.join('&') ) + config :url, nil # the target url + config :headers, {}, &c.hash # a hash of request headers + config :params, {}, &c.hash # a hash of query parameters + config :request_method, 'GET' # the request method (get or post) + config :version, 1.1 # the HTTP version + config :redirection_limit, nil, &c.integer_or_nil # the redirection limit for the request + + config :max_threads, 10, &c.integer # the maximum number of request threads + + # Prepares the request_hash by symbolizing keys and adding missing + # parameters using the current configuration values. + def prepare(request_hash) + request_hash.inject( + :url => url, + :headers => headers, + :params => params, + :request_method => request_method, + :version => version, + :redirection_limit => redirection_limit + ) do |options, (key, value)| + options[(key.to_sym rescue key) || key] = value + options + end end - - # Formats params as 'multipart/form-data' using the specified boundary, - # for use as the body of a post request. Multiple values for a parameter - # may be specified using an array. All newlines include a carriage - # return for proper formatting. - # - # format_multipart_form_data(:key => 'value') - # # => %Q{--1234567890\r - # # Content-Disposition: form-data; name="key"\r - # # \r - # # value\r - # # --1234567890--\r - # # } - # - # To specify a file, use a hash of file-related headers. - # - # format_multipart_form_data(:key => { - # 'Content-Type' => 'text/plain', - # 'Filename' => "path/to/file.txt"} - # ) - # # => %Q{--1234567890\r - # # Content-Disposition: form-data; name="key"; filename="path/to/file.txt"\r - # # Content-Type: text/plain\r - # # \r - # # \r - # # --1234567890--\r - # # } - # - def format_multipart_form_data(params, boundary="1234567890") - body = [] - params.each_pair do |key, values| - values = [values] unless values.kind_of?(Array) - - values.each do |value| - body << case value - when Hash - hash = headerize_keys(value) - filename = hash.delete('Filename') || "" - content = File.exists?(filename) ? File.read(filename) : "" - - header = "Content-Disposition: form-data; name=\"#{key.to_s}\"; filename=\"#{filename}\"\r\n" - hash.each_pair { |key, value| header << "#{key}: #{value}\r\n" } - "#{header}\r\n#{content}\r\n" - else - %Q{Content-Disposition: form-data; name="#{key.to_s}"\r\n\r\n#{value.to_s}\r\n} + + def process(*requests) + # build a queue of all the requests to be handled + queue = Queue.new + requests.each_with_index do |request, index| + queue.enq [prepare(request), index] + index += 1 + end + + # submit and retrieve all requests before processing + # responses. this assures responses are processed + # in order, in case it matters. + lock = Mutex.new + responses = [] + request_threads = Array.new(max_threads) do + Thread.new do + begin + while !queue.empty? + request, index = queue.deq(true) + log(request[:request_method], request[:url]) + + res = Dispatch.submit(request) + lock.synchronize { responses[index] = res } + end + rescue(ThreadError) + # Catch errors due to the queue being empty. + # (this should not occur as the queue is checked) + raise $! unless $!.message == 'queue empty' end end end + request_threads.each {|thread| thread.join } - body.collect {|p| "--#{boundary}\r\n#{p}" }.join('') + "--#{boundary}--\r\n" + # process responses and collect results + errors = [] + responses = responses.collect do |res| + begin + process_response(res) + rescue(ResponseError) + errors << [$!, responses.index(res)] + nil + end + end + + unless errors.empty? + handle_response_errors(responses, errors) + end + + responses end - + + # Hook for processing a response. By default process_response + # simply logs the response message and returns the response. + def process_response(res) + log(nil, res.message) + res + end + + # A hook for handling a batch of response errors, perhaps + # doing something meaningful with the successful responses. + # By default, concatenates the error messages and raises + # a new ResponseError. + def handle_response_errors(responses, errors) + errors.collect! {|error, n| "request #{n}: #{error.message}"} + errors.unshift("Error processing responses:") + raise ResponseError, errors.join("\n") + end + + class ResponseError < StandardError + end end end -end - +end \ No newline at end of file