autoload(:WEBrick, 'webrick') autoload(:StringIO, 'stringio') require 'rack' module Tap module Mechanize module Utils module_function # Parses a WEBrick::HTTPRequest from the input socket into a hash that # may be resubmitted by Dispatch. Sockets can be any kind of IO (File, # StringIO, etc..) and should be positioned such that the next line is # the start of an HTTP request. Strings used as sockets are converted # into StringIO objects. # # parse_http_request("GET /path HTTP/1.1\n") # # => { # # :request_method => "GET", # # :url => "/path", # # :version => "1.1", # # :headers => {}, # # :params => {}, # # } # # ==== WEBrick parsing of HTTP format # # WEBrick will parse headers then the body of a request, and currently # (1.8.6) considers an empty line as a break between the headers and # body. In general header parsing is forgiving with end-line # characters (ie "\r\n" and "\n" are both acceptable) but parsing of # multipart/form data IS NOT. # # Multipart/form data REQUIRES that the end-line characters are "\r\n". # A boundary is always started with "--" and the last boundary completed # with "--". As always, the content-length must be correct. # # # Notice an empty line between the last header # # (in this case 'Content-Length') and the body. # msg = <<-_end_of_message_ # POST /path HTTP/1.1 # Host: localhost:8080 # Content-Type: multipart/form-data; boundary=1234567890 # Content-Length: 158 # # --1234567890 # Content-Disposition: form-data; name="one" # # value one # --1234567890 # Content-Disposition: form-data; name="two" # # value two # --1234567890-- # _end_of_message_ # # # ensure the end of line characters are correct... # socket = StringIO.new msg.gsub(/\n/, "\r\n") # # Tap::Net.parse_http_request(socket) # # => { # # :request_method => "POST", # # :url => "http://localhost:8080/path", # # :version => "HTTP/1.1", # # :headers => { # # "Host" => "localhost:8080", # # "Content-Type" => "multipart/form-data; boundary=1234567890", # # "Content-Length" => "158"}, # # :params => { # # "one" => "value one", # # "two" => "value two"}} # #-- # TODO: check if there are other headers to capture from # a multipart/form file. Currently only # 'Filename' and 'Content-Type' are added def parse_http_request(socket, keep_content=true) socket = StringIO.new(socket) if socket.kind_of?(String) req = WEBrick::HTTPRequest.new(WEBrick::Config::HTTP) req.parse(socket) parse_webrick_request(req, keep_content) end # Parses a WEBrick::HTTPRequest, with the same activity as # parse_http_request. def parse_webrick_request(req, keep_content=true) headers = {} req.header.each_pair do |key, values| headers[headerize(key)] = splat(values) end if req.header params = {} req.query.each_pair do |key, value| # no sense for how robust this is... # In tests value is (always?) a WEBrick::HTTPUtils::FormData. Each # data is likewise a FormData. If FormData is a file, it has a # filename and you have to try [] to get the content-type. # Senseless. No wonder WEBrick has no documentation, who could # write it? values = [] value.each_data do |data| values << if data.filename hash = {'Filename' => data.filename, 'Content-Type' => data['Content-Type']} hash['Content'] = data.to_a.join("\n") if keep_content hash else data.to_s end end params[key] = splat(values) end if req.query { :uri => headers['Host'] ? File.join("http://", headers['Host'], req.path_info) : req.path_info, :request_method => req.request_method, :version => req.http_version.to_s, :headers => headers, :params => params} end # Parses a Rack::Request, with the same activity as parse_http_request. def parse_rack_request(request, keep_content=true) headers = {} request.env.each_pair do |key, value| key = case key when "HTTP_VERSION" then next when /^HTTP_(.*)/ then $1 when 'CONTENT_TYPE' then key else next end headers[headerize(key)] = value end params = {} request.params.each_pair do |key, value| params[key] = each_member(value) do |obj| if obj.kind_of?(Hash) file = {'Content-Type' => obj[:type], 'Filename' => obj[:filename]} file['Content'] = obj[:tempfile].read if keep_content file else obj end end end { :uri => File.join("http://", headers['Host'], request.env['PATH_INFO']), :request_method => request.request_method, :version => request.env['HTTP_VERSION'] =~ /^HTTP\/(.*)$/ ? $1.to_f : request.env['HTTP_VERSION'], :headers => headers, :params => params } end # Yields each member of an input array to the block and collects the # result. If obj is not an array, the value is simply yielded to the # block. def each_member(obj) if obj.kind_of?(Array) obj.collect {|value| yield(value) } else yield(obj) end end # Returns the first member of arrays length <= 1, or the array in all # other cases. Splat is useful to simplify hashes of http headers # and parameters that may have multiple values, but typically only # have one. # # splat([]) # => nil # splat([:one]) # => :one # splat([:one, :two]) # => [:one, :two] # def splat(array) return array unless array.kind_of?(Array) case array.length when 0 then nil when 1 then array.first else array end end # Headerizes an underscored string. The input is be converted to # a string using to_s. # # headerize('SOME_STRING') # => 'Some-String' # headerize('some string') # => 'Some-String' # headerize('Some-String') # => 'Some-String' # def headerize(str) str.to_s.gsub(/\s|-/, "_").split("_").collect do |s| s =~ /^(.)(.*)/ $1.upcase + $2.downcase end.join("-") end EOL = Rack::Utils::Multipart::EOL # Lifted from Rack::Utils::Multipart, and modified to collect # overloaded params and params with names suffixed by '[]' as # arrays. def parse_multipart(env) unless env['CONTENT_TYPE'] =~ %r|\Amultipart/form-data.*boundary=\"?([^\";,]+)\"?|n nil else boundary = "--#{$1}" params = {} buf = "" content_length = env['CONTENT_LENGTH'].to_i input = env['rack.input'] boundary_size = boundary.size + EOL.size bufsize = 16384 content_length -= boundary_size status = input.read(boundary_size) raise EOFError, "bad content body" unless status == boundary + EOL rx = /(?:#{EOL})?#{Regexp.quote boundary}(#{EOL}|--)/ loop { head = nil body = '' filename = content_type = name = nil until head && buf =~ rx if !head && i = buf.index("\r\n\r\n") head = buf.slice!(0, i+2) # First \r\n buf.slice!(0, 2) # Second \r\n filename = head[/Content-Disposition:.* filename="?([^\";]*)"?/ni, 1] content_type = head[/Content-Type: (.*)\r\n/ni, 1] name = head[/Content-Disposition:.* name="?([^\";]*)"?/ni, 1] if filename body = Tempfile.new("RackMultipart") body.binmode if body.respond_to?(:binmode) end next end # Save the read body part. if head && (boundary_size+4 < buf.size) body << buf.slice!(0, buf.size - (boundary_size+4)) end c = input.read(bufsize < content_length ? bufsize : content_length) raise EOFError, "bad content body" if c.nil? || c.empty? buf << c content_length -= c.size end # Save the rest. if i = buf.index(rx) body << buf.slice!(0, i) buf.slice!(0, boundary_size+2) content_length = -1 if $1 == "--" end if filename body.rewind data = {:filename => filename, :type => content_type, :name => name, :tempfile => body, :head => head} else data = body end if name case current = params[name] when nil params[name] = (name =~ /\[\]\z/ ? [data] : data) when Array params[name] << data else params[name] = [current, data] end end break if buf.empty? || content_length == -1 } params end end end end end