lib/rack/multipart/parser.rb in rack-2.2.10 vs lib/rack/multipart/parser.rb in rack-3.0.0.beta1
- old
+ new
@@ -1,27 +1,55 @@
# frozen_string_literal: true
require 'strscan'
+require_relative '../utils'
+
module Rack
module Multipart
class MultipartPartLimitError < Errno::EMFILE; end
- class MultipartTotalPartLimitError < StandardError; end
- class Parser
- (require_relative '../core_ext/regexp'; using ::Rack::RegexpExtensions) if RUBY_VERSION < '2.4'
+ # Use specific error class when parsing multipart request
+ # that ends early.
+ class EmptyContentError < ::EOFError; end
+ # Base class for multipart exceptions that do not subclass from
+ # other exception classes for backwards compatibility.
+ class Error < StandardError; end
+
+ EOL = "\r\n"
+ MULTIPART = %r|\Amultipart/.*boundary=\"?([^\";,]+)\"?|ni
+ TOKEN = /[^\s()<>,;:\\"\/\[\]?=]+/
+ CONDISP = /Content-Disposition:\s*#{TOKEN}\s*/i
+ VALUE = /"(?:\\"|[^"])*"|#{TOKEN}/
+ BROKEN = /^#{CONDISP}.*;\s*filename=(#{VALUE})/i
+ MULTIPART_CONTENT_TYPE = /Content-Type: (.*)#{EOL}/ni
+ MULTIPART_CONTENT_DISPOSITION = /Content-Disposition:.*;\s*name=(#{VALUE})/ni
+ MULTIPART_CONTENT_ID = /Content-ID:\s*([^#{EOL}]*)/ni
+ # Updated definitions from RFC 2231
+ ATTRIBUTE_CHAR = %r{[^ \t\v\n\r)(><@,;:\\"/\[\]?='*%]}
+ ATTRIBUTE = /#{ATTRIBUTE_CHAR}+/
+ SECTION = /\*[0-9]+/
+ REGULAR_PARAMETER_NAME = /#{ATTRIBUTE}#{SECTION}?/
+ REGULAR_PARAMETER = /(#{REGULAR_PARAMETER_NAME})=(#{VALUE})/
+ EXTENDED_OTHER_NAME = /#{ATTRIBUTE}\*[1-9][0-9]*\*/
+ EXTENDED_OTHER_VALUE = /%[0-9a-fA-F]{2}|#{ATTRIBUTE_CHAR}/
+ EXTENDED_OTHER_PARAMETER = /(#{EXTENDED_OTHER_NAME})=(#{EXTENDED_OTHER_VALUE}*)/
+ EXTENDED_INITIAL_NAME = /#{ATTRIBUTE}(?:\*0)?\*/
+ EXTENDED_INITIAL_VALUE = /[a-zA-Z0-9\-]*'[a-zA-Z0-9\-]*'#{EXTENDED_OTHER_VALUE}*/
+ EXTENDED_INITIAL_PARAMETER = /(#{EXTENDED_INITIAL_NAME})=(#{EXTENDED_INITIAL_VALUE})/
+ EXTENDED_PARAMETER = /#{EXTENDED_INITIAL_PARAMETER}|#{EXTENDED_OTHER_PARAMETER}/
+ DISPPARM = /;\s*(?:#{REGULAR_PARAMETER}|#{EXTENDED_PARAMETER})\s*/
+ RFC2183 = /^#{CONDISP}(#{DISPPARM})+$/i
+
+ class Parser
BUFSIZE = 1_048_576
TEXT_PLAIN = "text/plain"
TEMPFILE_FACTORY = lambda { |filename, content_type|
- extension = ::File.extname(filename.gsub("\0", '%00'))[0, 129]
-
- Tempfile.new(["RackMultipart", extension])
+ Tempfile.new(["RackMultipart", ::File.extname(filename.gsub("\0", '%00'))])
}
- BOUNDARY_REGEX = /\A([^\n]*(?:\n|\Z))/
-
class BoundedIO # :nodoc:
def initialize(io, content_length)
@io = io
@content_length = content_length
@cursor = 0
@@ -39,20 +67,16 @@
end
if str
@cursor += str.bytesize
else
- # Raise an error for mismatching Content-Length and actual contents
+ # Raise an error for mismatching content-length and actual contents
raise EOFError, "bad content body"
end
str
end
-
- def rewind
- @io.rewind
- end
end
MultipartInfo = Struct.new :params, :tmp_files
EMPTY = MultipartInfo.new(nil, [])
@@ -67,22 +91,21 @@
return EMPTY if 0 == content_length
boundary = parse_boundary content_type
return EMPTY unless boundary
+ if boundary.length > 70
+ # RFC 1521 Section 7.2.1 imposes a 70 character maximum for the boundary.
+ # Most clients use no more than 55 characters.
+ raise Error, "multipart boundary size too large (#{boundary.length} characters)"
+ end
+
io = BoundedIO.new(io, content_length) if content_length
- outbuf = String.new
parser = new(boundary, tmpfile, bufsize, qp)
- parser.on_read io.read(bufsize, outbuf)
+ parser.parse(io)
- loop do
- break if parser.state == :DONE
- parser.on_read io.read(bufsize, outbuf)
- end
-
- io.rewind
parser.result
end
class Collector
class MimePart < Struct.new(:body, :head, :filename, :content_type, :name)
@@ -141,11 +164,11 @@
klass = BufferPart
end
@mime_parts[mime_index] = klass.new(body, head, filename, content_type, name)
- check_part_limits
+ check_open_files
end
def on_mime_body(mime_index, content)
@mime_parts[mime_index].body << content
end
@@ -153,97 +176,104 @@
def on_mime_finish(mime_index)
end
private
- def check_part_limits
- file_limit = Utils.multipart_file_limit
- part_limit = Utils.multipart_total_part_limit
-
- if file_limit && file_limit > 0
- if @open_files >= file_limit
+ def check_open_files
+ if Utils.multipart_part_limit > 0
+ if @open_files >= Utils.multipart_part_limit
@mime_parts.each(&:close)
raise MultipartPartLimitError, 'Maximum file multiparts in content reached'
end
end
-
- if part_limit && part_limit > 0
- if @mime_parts.size >= part_limit
- @mime_parts.each(&:close)
- raise MultipartTotalPartLimitError, 'Maximum total multiparts in content reached'
- end
- end
end
end
attr_reader :state
def initialize(boundary, tempfile, bufsize, query_parser)
@query_parser = query_parser
@params = query_parser.make_params
- @boundary = "--#{boundary}"
@bufsize = bufsize
- @full_boundary = @boundary
- @end_boundary = @boundary + '--'
@state = :FAST_FORWARD
@mime_index = 0
@collector = Collector.new tempfile
@sbuf = StringScanner.new("".dup)
- @body_regex = /(?:#{EOL})?#{Regexp.quote(@boundary)}(?:#{EOL}|--)/m
- @end_boundary_size = boundary.bytesize + 6 # (-- at start, -- at finish, EOL at end)
- @rx_max_size = EOL.size + @boundary.bytesize + [EOL.size, '--'.size].max
+ @body_regex = /(?:#{EOL}|\A)--#{Regexp.quote(boundary)}(?:#{EOL}|--)/m
+ @rx_max_size = boundary.bytesize + 6 # (\r\n-- at start, either \r\n or -- at finish)
@head_regex = /(.*?#{EOL})#{EOL}/m
end
- def on_read(content)
- handle_empty_content!(content)
- @sbuf.concat content
- run_parser
+ def parse(io)
+ outbuf = String.new
+ read_data(io, outbuf)
+
+ loop do
+ status =
+ case @state
+ when :FAST_FORWARD
+ handle_fast_forward
+ when :CONSUME_TOKEN
+ handle_consume_token
+ when :MIME_HEAD
+ handle_mime_head
+ when :MIME_BODY
+ handle_mime_body
+ else # when :DONE
+ return
+ end
+
+ read_data(io, outbuf) if status == :want_read
+ end
end
def result
@collector.each do |part|
part.get_data do |data|
tag_multipart_encoding(part.filename, part.content_type, part.name, data)
- @query_parser.normalize_params(@params, part.name, data, @query_parser.param_depth_limit)
+ @query_parser.normalize_params(@params, part.name, data)
end
end
MultipartInfo.new @params.to_params_hash, @collector.find_all(&:file?).map(&:body)
end
private
- def run_parser
- loop do
- case @state
- when :FAST_FORWARD
- break if handle_fast_forward == :want_read
- when :CONSUME_TOKEN
- break if handle_consume_token == :want_read
- when :MIME_HEAD
- break if handle_mime_head == :want_read
- when :MIME_BODY
- break if handle_mime_body == :want_read
- when :DONE
- break
- end
- end
+ def dequote(str) # From WEBrick::HTTPUtils
+ ret = (/\A"(.*)"\Z/ =~ str) ? $1 : str.dup
+ ret.gsub!(/\\(.)/, "\\1")
+ ret
end
- def handle_fast_forward
- tok = consume_boundary
+ def read_data(io, outbuf)
+ content = io.read(@bufsize, outbuf)
+ handle_empty_content!(content)
+ @sbuf.concat(content)
+ end
- if tok == :END_BOUNDARY && @sbuf.pos == @end_boundary_size && @sbuf.eos?
- # stop parsing a buffer if a buffer is only an end boundary.
- @state = :DONE
- elsif tok
- @state = :MIME_HEAD
- else
- raise EOFError, "bad content body" if @sbuf.rest_size >= @bufsize
- :want_read
+ # This handles the initial parser state. We read until we find the starting
+ # boundary, then we can transition to the next state. If we find the ending
+ # boundary, this is an invalid multipart upload, but keep scanning for opening
+ # boundary in that case. If no boundary found, we need to keep reading data
+ # and retry. It's highly unlikely the initial read will not consume the
+ # boundary. The client would have to deliberately craft a response
+ # with the opening boundary beyond the buffer size for that to happen.
+ def handle_fast_forward
+ while true
+ case consume_boundary
+ when :BOUNDARY
+ # found opening boundary, transition to next state
+ @state = :MIME_HEAD
+ return
+ when :END_BOUNDARY
+ # invalid multipart upload, but retry for opening boundary
+ else
+ # no boundary found, keep reading data
+ return :want_read
+ end
end
end
def handle_consume_token
tok = consume_boundary
@@ -258,11 +288,11 @@
def handle_mime_head
if @sbuf.scan_until(@head_regex)
head = @sbuf[1]
content_type = head[MULTIPART_CONTENT_TYPE, 1]
if name = head[MULTIPART_CONTENT_DISPOSITION, 1]
- name = Rack::Auth::Digest::Params::dequote(name)
+ name = dequote(name)
else
name = head[MULTIPART_CONTENT_ID, 1]
end
filename = get_filename(head)
@@ -295,32 +325,33 @@
end
:want_read
end
end
- def full_boundary; @full_boundary; end
-
+ # Scan until the we find the start or end of the boundary.
+ # If we find it, return the appropriate symbol for the start or
+ # end of the boundary. If we don't find the start or end of the
+ # boundary, clear the buffer and return nil.
def consume_boundary
- while read_buffer = @sbuf.scan_until(BOUNDARY_REGEX)
- case read_buffer.strip
- when full_boundary then return :BOUNDARY
- when @end_boundary then return :END_BOUNDARY
- end
- return if @sbuf.eos?
+ if read_buffer = @sbuf.scan_until(@body_regex)
+ read_buffer.end_with?(EOL) ? :BOUNDARY : :END_BOUNDARY
+ else
+ @sbuf.terminate
+ nil
end
end
def get_filename(head)
filename = nil
case head
when RFC2183
params = Hash[*head.scan(DISPPARM).flat_map(&:compact)]
- if filename = params['filename']
- filename = $1 if filename =~ /^"(.*)"$/
- elsif filename = params['filename*']
+ if filename = params['filename*']
encoding, _, filename = filename.split("'", 3)
+ elsif filename = params['filename']
+ filename = $1 if filename =~ /^"(.*)"$/
end
when BROKEN
filename = $1
filename = $1 if filename =~ /^"(.*)"$/
end
@@ -343,10 +374,11 @@
filename
end
CHARSET = "charset"
+ deprecate_constant :CHARSET
def tag_multipart_encoding(filename, content_type, name, body)
name = name.to_s
encoding = Encoding::UTF_8
@@ -363,21 +395,27 @@
rest.each do |param|
k, v = param.split('=', 2)
k.strip!
v.strip!
v = v[1..-2] if v.start_with?('"') && v.end_with?('"')
- encoding = Encoding.find v if k == CHARSET
+ if k == "charset"
+ encoding = begin
+ Encoding.find v
+ rescue ArgumentError
+ Encoding::BINARY
+ end
+ end
end
end
end
name.force_encoding(encoding)
body.force_encoding(encoding)
end
def handle_empty_content!(content)
if content.nil? || content.empty?
- raise EOFError
+ raise EmptyContentError
end
end
end
end
end