autoload(:WEBrick, 'webrick') autoload(:Zlib, 'zlib') autoload(:StringIO, 'stringio') module Tap module Http module Utils module_function # Parses a WEBrick::HTTPRequest from the input socket into a hash that # may be resubmitted by Dispatch. Sockets can be any kind of IO (File, # StringIO, etc..) and should be positioned such that the next line is # the start of an HTTP request. Strings used as sockets are converted # into StringIO objects. # # parse_http_request("GET /path HTTP/1.1\n") # # => { # # :request_method => "GET", # # :url => "/path", # # :version => "1.1", # # :headers => {}, # # :params => {}, # # } # # If splat_values is specified, single-value headers and parameters # will be hashed as single values. Otherwise, all header and parameter # values will be arrays. # # str = "GET /path?one=a&one=b&two=c HTTP/1.1\n" # req = parse_http_request(str) # req[:params] # => {'one' => ['a', 'b'], 'two' => 'c'} # # req = parse_http_request(str, false) # req[:params] # => {'one' => ['a', 'b'], 'two' => ['c']} # # ==== WEBrick parsing of HTTP format # # WEBrick will parse headers then the body of a request, and currently # (1.8.6) considers an empty line as a break between the headers and # body. In general header parsing is forgiving with end-line # characters (ie "\r\n" and "\n" are both acceptable) but parsing of # multipart/form data IS NOT. # # Multipart/form data REQUIRES that the end-line characters are "\r\n". # A boundary is always started with "--" and the last boundary completed # with "--". As always, the content-length must be correct. # # # Notice an empty line between the last header # # (in this case 'Content-Length') and the body. # msg = <<-_end_of_message_ # POST /path HTTP/1.1 # Host: localhost:8080 # Content-Type: multipart/form-data; boundary=1234567890 # Content-Length: 158 # # --1234567890 # Content-Disposition: form-data; name="one" # # value one # --1234567890 # Content-Disposition: form-data; name="two" # # value two # --1234567890-- # _end_of_message_ # # # ensure the end of line characters are correct... # socket = StringIO.new msg.gsub(/\n/, "\r\n") # # Tap::Net.parse_http_request(socket) # # => { # # :request_method => "POST", # # :url => "http://localhost:8080/path", # # :version => "HTTP/1.1", # # :headers => { # # "Host" => "localhost:8080", # # "Content-Type" => "multipart/form-data; boundary=1234567890", # # "Content-Length" => "158"}, # # :params => { # # "one" => "value one", # # "two" => "value two"}} # #-- # TODO: check if there are other headers to capture from # a multipart/form file. Currently only # 'Filename' and 'Content-Type' are added def parse_http_request(socket, splat_values=true) socket = StringIO.new(socket) if socket.kind_of?(String) req = WEBrick::HTTPRequest.new(WEBrick::Config::HTTP) req.parse(socket) parse_webrick_request(req, splat_values) end # Parses a WEBrick::HTTPRequest, with the same activity as # parse_http_request. def parse_webrick_request(req, splat_values=true) headers = {} req.header.each_pair do |key, values| headers[headerize(key)] = splat_values ? splat(values) : values end if req.header params = {} req.query.each_pair do |key, value| # no sense for how robust this is... # In tests value is (always?) a WEBrick::HTTPUtils::FormData. Each # data is likewise a FormData. If FormData is a file, it has a # filename and you have to try [] to get the content-type. # Senseless. No wonder WEBrick has no documentation, who could # write it? values = [] value.each_data do |data| values << if data.filename {'Filename' => data.filename, 'Content-Type' => data['Content-Type']} else data.to_s end end params[key] = splat_values ? splat(values) : values end if req.query { :url => headers['Host'] ? File.join("http://", headers['Host'], req.path_info) : req.path_info, :request_method => req.request_method, :version => req.http_version.to_s, :headers => headers, :params => params} end # Parses the input CGI into a hash that may be resubmitted by Dispatch. # To work properly, the standard CGI environmental variables must be # set in ENV. # def parse_cgi_request(cgi, splat_values=true) headers = {} ENV.each_pair do |key, values| key = case key when "HTTP_VERSION" then next when /^HTTP_(.*)/ then $1 when 'CONTENT_TYPE' then key else next end headers[headerize(key)] = splat_values ? splat(values) : values end params = {} cgi.params.each_pair do |key, values| values = values.collect do |value| case when !value.respond_to?(:read) value when value.original_filename.empty? value.read else {'Filename' => value.original_filename, 'Content-Type' => value.content_type} end end params[key] = splat_values ? splat(values) : values end { :url => File.join("http://", headers['Host'], ENV['PATH_INFO']), :request_method => ENV['REQUEST_METHOD'], :version => ENV['HTTP_VERSION'] =~ /^HTTP\/(.*)$/ ? $1.to_f : ENV['HTTP_VERSION'], :headers => headers, :params => params} end def determine_url(action, referer) base = File.basename(referer) case action when /^https?:/ then action when /\// # only use host of page_url File.join(base, action) else File.join(base, action) end end # Headerizes an underscored string. The input is be converted to # a string using to_s. # # headerize('SOME_STRING') # => 'Some-String' # headerize('some string') # => 'Some-String' # headerize('Some-String') # => 'Some-String' # def headerize(str) str.to_s.gsub(/\s|-/, "_").split("_").collect do |s| s =~ /^(.)(.*)/ $1.upcase + $2.downcase end.join("-") end # Returns the first member of arrays length <= 1, or the array in all # other cases. Splat is useful to simplify hashes of http headers # and parameters that may have multiple values, but typically only # have one. # # splat([]) # => nil # splat([:one]) # => :one # splat([:one, :two]) # => [:one, :two] # def splat(array) return array unless array.kind_of?(Array) case array.length when 0 then nil when 1 then array.first else array end end # Inflates (ie unzips) a gzip string, as may be returned by requests # that accept 'gzip' and 'deflate' content encoding. # #-- # Utils.inflate(res.body) if res['content-encoding'] == 'gzip' # def inflate(str) Zlib::GzipReader.new( StringIO.new( str ) ).read end end end end