lib/rack/multipart/parser.rb in rack-2.2.10 vs lib/rack/multipart/parser.rb in rack-3.0.0.beta1

- old
+ new

@@ -1,27 +1,55 @@ # frozen_string_literal: true require 'strscan' +require_relative '../utils' + module Rack module Multipart class MultipartPartLimitError < Errno::EMFILE; end - class MultipartTotalPartLimitError < StandardError; end - class Parser - (require_relative '../core_ext/regexp'; using ::Rack::RegexpExtensions) if RUBY_VERSION < '2.4' + # Use specific error class when parsing multipart request + # that ends early. + class EmptyContentError < ::EOFError; end + # Base class for multipart exceptions that do not subclass from + # other exception classes for backwards compatibility. + class Error < StandardError; end + + EOL = "\r\n" + MULTIPART = %r|\Amultipart/.*boundary=\"?([^\";,]+)\"?|ni + TOKEN = /[^\s()<>,;:\\"\/\[\]?=]+/ + CONDISP = /Content-Disposition:\s*#{TOKEN}\s*/i + VALUE = /"(?:\\"|[^"])*"|#{TOKEN}/ + BROKEN = /^#{CONDISP}.*;\s*filename=(#{VALUE})/i + MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{EOL}/ni + MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:.*;\s*name=(#{VALUE})/ni + MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{EOL}]*)/ni + # Updated definitions from RFC 2231 + ATTRIBUTE_CHAR = %r{[^ \t\v\n\r)(><@,;:\\"/\[\]?='*%]} + ATTRIBUTE = /#{ATTRIBUTE_CHAR}+/ + SECTION = /\*[0-9]+/ + REGULAR_PARAMETER_NAME = /#{ATTRIBUTE}#{SECTION}?/ + REGULAR_PARAMETER = /(#{REGULAR_PARAMETER_NAME})=(#{VALUE})/ + EXTENDED_OTHER_NAME = /#{ATTRIBUTE}\*[1-9][0-9]*\*/ + EXTENDED_OTHER_VALUE = /%[0-9a-fA-F]{2}|#{ATTRIBUTE_CHAR}/ + EXTENDED_OTHER_PARAMETER = /(#{EXTENDED_OTHER_NAME})=(#{EXTENDED_OTHER_VALUE}*)/ + EXTENDED_INITIAL_NAME = /#{ATTRIBUTE}(?:\*0)?\*/ + EXTENDED_INITIAL_VALUE = /[a-zA-Z0-9\-]*'[a-zA-Z0-9\-]*'#{EXTENDED_OTHER_VALUE}*/ + EXTENDED_INITIAL_PARAMETER = /(#{EXTENDED_INITIAL_NAME})=(#{EXTENDED_INITIAL_VALUE})/ + EXTENDED_PARAMETER = /#{EXTENDED_INITIAL_PARAMETER}|#{EXTENDED_OTHER_PARAMETER}/ + DISPPARM = /;\s*(?:#{REGULAR_PARAMETER}|#{EXTENDED_PARAMETER})\s*/ + RFC2183 = /^#{CONDISP}(#{DISPPARM})+$/i + + class Parser BUFSIZE = 1_048_576 TEXT_PLAIN = "text/plain" TEMPFILE_FACTORY = lambda { |filename, content_type| - extension = ::File.extname(filename.gsub("\0", '%00'))[0, 129] - - Tempfile.new(["RackMultipart", extension]) + Tempfile.new(["RackMultipart", ::File.extname(filename.gsub("\0", '%00'))]) } - BOUNDARY_REGEX = /\A([^\n]*(?:\n|\Z))/ - class BoundedIO # :nodoc: def initialize(io, content_length) @io = io @content_length = content_length @cursor = 0 @@ -39,20 +67,16 @@ end if str @cursor += str.bytesize else - # Raise an error for mismatching Content-Length and actual contents + # Raise an error for mismatching content-length and actual contents raise EOFError, "bad content body" end str end - - def rewind - @io.rewind - end end MultipartInfo = Struct.new :params, :tmp_files EMPTY = MultipartInfo.new(nil, []) @@ -67,22 +91,21 @@ return EMPTY if 0 == content_length boundary = parse_boundary content_type return EMPTY unless boundary + if boundary.length > 70 + # RFC 1521 Section 7.2.1 imposes a 70 character maximum for the boundary. + # Most clients use no more than 55 characters. + raise Error, "multipart boundary size too large (#{boundary.length} characters)" + end + io = BoundedIO.new(io, content_length) if content_length - outbuf = String.new parser = new(boundary, tmpfile, bufsize, qp) - parser.on_read io.read(bufsize, outbuf) + parser.parse(io) - loop do - break if parser.state == :DONE - parser.on_read io.read(bufsize, outbuf) - end - - io.rewind parser.result end class Collector class MimePart < Struct.new(:body, :head, :filename, :content_type, :name) @@ -141,11 +164,11 @@ klass = BufferPart end @mime_parts[mime_index] = klass.new(body, head, filename, content_type, name) - check_part_limits + check_open_files end def on_mime_body(mime_index, content) @mime_parts[mime_index].body << content end @@ -153,97 +176,104 @@ def on_mime_finish(mime_index) end private - def check_part_limits - file_limit = Utils.multipart_file_limit - part_limit = Utils.multipart_total_part_limit - - if file_limit && file_limit > 0 - if @open_files >= file_limit + def check_open_files + if Utils.multipart_part_limit > 0 + if @open_files >= Utils.multipart_part_limit @mime_parts.each(&:close) raise MultipartPartLimitError, 'Maximum file multiparts in content reached' end end - - if part_limit && part_limit > 0 - if @mime_parts.size >= part_limit - @mime_parts.each(&:close) - raise MultipartTotalPartLimitError, 'Maximum total multiparts in content reached' - end - end end end attr_reader :state def initialize(boundary, tempfile, bufsize, query_parser) @query_parser = query_parser @params = query_parser.make_params - @boundary = "--#{boundary}" @bufsize = bufsize - @full_boundary = @boundary - @end_boundary = @boundary + '--' @state = :FAST_FORWARD @mime_index = 0 @collector = Collector.new tempfile @sbuf = StringScanner.new("".dup) - @body_regex = /(?:#{EOL})?#{Regexp.quote(@boundary)}(?:#{EOL}|--)/m - @end_boundary_size = boundary.bytesize + 6 # (-- at start, -- at finish, EOL at end) - @rx_max_size = EOL.size + @boundary.bytesize + [EOL.size, '--'.size].max + @body_regex = /(?:#{EOL}|\A)--#{Regexp.quote(boundary)}(?:#{EOL}|--)/m + @rx_max_size = boundary.bytesize + 6 # (\r\n-- at start, either \r\n or -- at finish) @head_regex = /(.*?#{EOL})#{EOL}/m end - def on_read(content) - handle_empty_content!(content) - @sbuf.concat content - run_parser + def parse(io) + outbuf = String.new + read_data(io, outbuf) + + loop do + status = + case @state + when :FAST_FORWARD + handle_fast_forward + when :CONSUME_TOKEN + handle_consume_token + when :MIME_HEAD + handle_mime_head + when :MIME_BODY + handle_mime_body + else # when :DONE + return + end + + read_data(io, outbuf) if status == :want_read + end end def result @collector.each do |part| part.get_data do |data| tag_multipart_encoding(part.filename, part.content_type, part.name, data) - @query_parser.normalize_params(@params, part.name, data, @query_parser.param_depth_limit) + @query_parser.normalize_params(@params, part.name, data) end end MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body) end private - def run_parser - loop do - case @state - when :FAST_FORWARD - break if handle_fast_forward == :want_read - when :CONSUME_TOKEN - break if handle_consume_token == :want_read - when :MIME_HEAD - break if handle_mime_head == :want_read - when :MIME_BODY - break if handle_mime_body == :want_read - when :DONE - break - end - end + def dequote(str) # From WEBrick::HTTPUtils + ret = (/\A"(.*)"\Z/ =~ str) ? $1 : str.dup + ret.gsub!(/\\(.)/, "\\1") + ret end - def handle_fast_forward - tok = consume_boundary + def read_data(io, outbuf) + content = io.read(@bufsize, outbuf) + handle_empty_content!(content) + @sbuf.concat(content) + end - if tok == :END_BOUNDARY && @sbuf.pos == @end_boundary_size && @sbuf.eos? - # stop parsing a buffer if a buffer is only an end boundary. - @state = :DONE - elsif tok - @state = :MIME_HEAD - else - raise EOFError, "bad content body" if @sbuf.rest_size >= @bufsize - :want_read + # This handles the initial parser state. We read until we find the starting + # boundary, then we can transition to the next state. If we find the ending + # boundary, this is an invalid multipart upload, but keep scanning for opening + # boundary in that case. If no boundary found, we need to keep reading data + # and retry. It's highly unlikely the initial read will not consume the + # boundary. The client would have to deliberately craft a response + # with the opening boundary beyond the buffer size for that to happen. + def handle_fast_forward + while true + case consume_boundary + when :BOUNDARY + # found opening boundary, transition to next state + @state = :MIME_HEAD + return + when :END_BOUNDARY + # invalid multipart upload, but retry for opening boundary + else + # no boundary found, keep reading data + return :want_read + end end end def handle_consume_token tok = consume_boundary @@ -258,11 +288,11 @@ def handle_mime_head if @sbuf.scan_until(@head_regex) head = @sbuf[1] content_type = head[MULTIPART_CONTENT_TYPE, 1] if name = head[MULTIPART_CONTENT_DISPOSITION, 1] - name = Rack::Auth::Digest::Params::dequote(name) + name = dequote(name) else name = head[MULTIPART_CONTENT_ID, 1] end filename = get_filename(head) @@ -295,32 +325,33 @@ end :want_read end end - def full_boundary; @full_boundary; end - + # Scan until the we find the start or end of the boundary. + # If we find it, return the appropriate symbol for the start or + # end of the boundary. If we don't find the start or end of the + # boundary, clear the buffer and return nil. def consume_boundary - while read_buffer = @sbuf.scan_until(BOUNDARY_REGEX) - case read_buffer.strip - when full_boundary then return :BOUNDARY - when @end_boundary then return :END_BOUNDARY - end - return if @sbuf.eos? + if read_buffer = @sbuf.scan_until(@body_regex) + read_buffer.end_with?(EOL) ? :BOUNDARY : :END_BOUNDARY + else + @sbuf.terminate + nil end end def get_filename(head) filename = nil case head when RFC2183 params = Hash[*head.scan(DISPPARM).flat_map(&:compact)] - if filename = params['filename'] - filename = $1 if filename =~ /^"(.*)"$/ - elsif filename = params['filename*'] + if filename = params['filename*'] encoding, _, filename = filename.split("'", 3) + elsif filename = params['filename'] + filename = $1 if filename =~ /^"(.*)"$/ end when BROKEN filename = $1 filename = $1 if filename =~ /^"(.*)"$/ end @@ -343,10 +374,11 @@ filename end CHARSET = "charset" + deprecate_constant :CHARSET def tag_multipart_encoding(filename, content_type, name, body) name = name.to_s encoding = Encoding::UTF_8 @@ -363,21 +395,27 @@ rest.each do |param| k, v = param.split('=', 2) k.strip! v.strip! v = v[1..-2] if v.start_with?('"') && v.end_with?('"') - encoding = Encoding.find v if k == CHARSET + if k == "charset" + encoding = begin + Encoding.find v + rescue ArgumentError + Encoding::BINARY + end + end end end end name.force_encoding(encoding) body.force_encoding(encoding) end def handle_empty_content!(content) if content.nil? || content.empty? - raise EOFError + raise EmptyContentError end end end end end