module Nyara
# a part in multipart
# for an easy introduction, http://msdn.microsoft.com/en-us/library/ms526943(v=exchg.10).aspx
#
# - todo make it possible to store data into /tmp (this requires memory threshold counting)
# - todo nested multipart?
class Part < ParamHash
MECHANISMS = %w[base64 quoted-printable 7bit 8bit binary].freeze
MECHANISMS.each &:freeze
# rfc2616
#
# token := 1*
# separators := "(" | ")" | "<" | ">" | "@"
# | "," | ";" | ":" | "\" | <">
# | "/" | "[" | "]" | "?" | "="
# | "{" | "}" | " " | "\t"
# CTL :=
#
TOKEN = /[^\x00-\x1f\x7f()<>@,;:\\"\/\[\]?=\{\}\ \t]+/ni
# rfc5978
#
# attr-char := ALPHA / DIGIT ; rfc5234
# / "!" / "#" / "$" / "&" / "+" / "-" / "."
# / "^" / "_" / "`" / "|" / "~"
#
ATTR_CHAR = /[a-z0-9!#$&+\-\.\^_`|~]/ni
# rfc5978 (NOTE rfc2231 param continuations is not recommended)
#
# value-chars := pct-encoded / attr-char
# pct-encoded := "%" HEXDIG HEXDIG
#
EX_PARAM = /\s*;\s*(filename|name)\s*(?:
= \s* "((?>\\"|[^"])*)" # quoted string - 2
| = \s* (#{TOKEN}) # token - 3
| \*= \s* ([\w\-]+) # charset - 4
'[\w\-]+' # language
((?>%\h\h|#{ATTR_CHAR})+) # value-chars - 5
)/xni
# analyse given +head+ and build a param hash representing the part
#
# [head] header
# [mechanism] 7bit, 8bit, binary, base64, or quoted-printable
# [type] mime type
# [data] decoded data (incomplete before Part#final called)
# [filename] basename of uploaded data
# [name] param name
#
def initialize head
self['head'] = head
if mechanism = head['Content-Transfer-Encoding']
self['mechanism'] = mechanism.strip.downcase
end
if self['type'] = head['Content-Type']
self['type'] = self['type'][/.*?(?=;|$)/]
end
self['data'] = ''.force_encoding('binary')
disposition = head['Content-Disposition']
if disposition
# skip first token
ex_params = disposition.sub TOKEN, ''
# store values not so specific as encoded value
tmp_values = {}
ex_params.scan EX_PARAM do |name, v1, v2, enc, v3|
name.downcase!
if enc
# value with charset and lang is more specific
self[name] ||= enc_unescape enc, v3
else
tmp_values[name] ||= (v1 || (CGI.unescape(v2) rescue nil))
end
end
self['filename'] ||= tmp_values['filename']
self['name'] ||= tmp_values['name']
end
if self['filename']
self['filename'] = File.basename self['filename']
end
self['name'] ||= head['Content-Id']
end
# prereq: +raw+ in binary encoding
def update raw
case self['mechanism']
when 'base64'
# rfc2045#section-6.8
raw.gsub! /\s+/n, ''
if self['tmp']
raw = (self['tmp'] << raw)
end
# last part can be at most 4 bytes and 2 '='s
size = raw.bytesize - 6
if size >= 4
size = size / 4 * 4
self['data'] << raw.slice!(0...size).unpack('m').first
end
self['tmp'] = raw
when 'quoted-printable'
# http://en.wikipedia.org/wiki/Quoted-printable
if self['tmp']
raw = (self['tmp'] << raw)
end
if i = raw.rindex("\r\n")
s = raw.slice! i
s.gsub!(/=(?:(\h\h)|\r\n)/n) do
[$1].pack 'H*'
end
self['data'] << s
end
self['tmp'] = raw
else # '7bit', '8bit', 'binary', ...
self['data'] << raw
end
end
def final
case self['mechanism']
when 'base64'
if tmp = self['tmp']
self['data'] << tmp.unpack('m').first
delete 'tmp'
end
when 'quoted-printable'
if tmp = self['tmp']
self['data'] << tmp.gsub(/=(\h\h)|=\r\n/n) do
[$1].pack 'H*'
end
delete 'tmp'
end
end
self
end
# ---
# private
# +++
def enc_unescape enc, v
enc = (Encoding.find enc rescue nil)
v = CGI.unescape v
v.force_encoding(enc).encode!('utf-8') if enc
v
rescue
nil
end
end
end