# frozen_string_literal: true require_relative "errors" require_relative "response_parser/parser_utils" module Net class IMAP < Protocol # Parses an \IMAP server response. class ResponseParser include ParserUtils extend ParserUtils::Generator # :call-seq: Net::IMAP::ResponseParser.new -> Net::IMAP::ResponseParser def initialize @str = nil @pos = nil @lex_state = nil @token = nil end # :call-seq: # parse(str) -> ContinuationRequest # parse(str) -> UntaggedResponse # parse(str) -> TaggedResponse # # Raises ResponseParseError for unparsable strings. def parse(str) @str = str @pos = 0 @lex_state = EXPR_BEG @token = nil return response end private # :stopdoc: EXPR_BEG = :EXPR_BEG # the default, used in most places EXPR_DATA = :EXPR_DATA # envelope, body(structure), namespaces T_SPACE = :SPACE # atom special T_ATOM = :ATOM # atom (subset of astring chars) T_NIL = :NIL # subset of atom and label T_NUMBER = :NUMBER # subset of atom T_LBRA = :LBRA # subset of atom T_PLUS = :PLUS # subset of atom; tag special T_RBRA = :RBRA # atom special; resp_special; valid astring char T_QUOTED = :QUOTED # starts/end with atom special T_BSLASH = :BSLASH # atom special; quoted special T_LPAR = :LPAR # atom special; paren list delimiter T_RPAR = :RPAR # atom special; paren list delimiter T_STAR = :STAR # atom special; list wildcard T_PERCENT = :PERCENT # atom special; list wildcard T_LITERAL = :LITERAL # starts with atom special T_CRLF = :CRLF # atom special; text special; quoted special T_TEXT = :TEXT # any char except CRLF T_EOF = :EOF # end of response string module Patterns module CharClassSubtraction refine Regexp do def -(rhs); /[#{source}&&[^#{rhs.source}]]/n.freeze end end end using CharClassSubtraction # From RFC5234, "Augmented BNF for Syntax Specifications: ABNF" # >>> # ALPHA = %x41-5A / %x61-7A ; A-Z / a-z # CHAR = %x01-7F # CRLF = CR LF # ; Internet standard newline # CTL = %x00-1F / %x7F # ; controls # DIGIT = %x30-39 # ; 0-9 # DQUOTE = %x22 # ; " (Double Quote) # HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F" # OCTET = %x00-FF # SP = %x20 module RFC5234 ALPHA = /[A-Za-z]/n CHAR = /[\x01-\x7f]/n CRLF = /\r\n/n CTL = /[\x00-\x1F\x7F]/n DIGIT = /\d/n DQUOTE = /"/n HEXDIG = /\h/ OCTET = /[\x00-\xFF]/n # not using /./m for embedding purposes SP = / /n end # UTF-8, a transformation format of ISO 10646 # >>> # UTF8-1 = %x00-7F # UTF8-tail = %x80-BF # UTF8-2 = %xC2-DF UTF8-tail # UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) / # %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail ) # UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) / # %xF4 %x80-8F 2( UTF8-tail ) # UTF8-char = UTF8-1 / UTF8-2 / UTF8-3 / UTF8-4 # UTF8-octets = *( UTF8-char ) # # n.b. String * Integer is used for repetition, rather than /x{3}/, # because ruby 3.2's linear-time cache-based optimization doesn't work # with "bounded or fixed times repetition nesting in another repetition # (e.g. /(a{2,3})*/). It is an implementation issue entirely, but we # believe it is hard to support this case correctly." # See https://bugs.ruby-lang.org/issues/19104 module RFC3629 UTF8_1 = /[\x00-\x7f]/n # aka ASCII 7bit UTF8_TAIL = /[\x80-\xBF]/n UTF8_2 = /[\xC2-\xDF]#{UTF8_TAIL}/n UTF8_3 = Regexp.union(/\xE0[\xA0-\xBF]#{UTF8_TAIL}/n, /\xED[\x80-\x9F]#{UTF8_TAIL}/n, /[\xE1-\xEC]#{ UTF8_TAIL.source * 2}/n, /[\xEE-\xEF]#{ UTF8_TAIL.source * 2}/n) UTF8_4 = Regexp.union(/[\xF1-\xF3]#{ UTF8_TAIL.source * 3}/n, /\xF0[\x90-\xBF]#{UTF8_TAIL.source * 2}/n, /\xF4[\x80-\x8F]#{UTF8_TAIL.source * 2}/n) UTF8_CHAR = Regexp.union(UTF8_1, UTF8_2, UTF8_3, UTF8_4) UTF8_OCTETS = /#{UTF8_CHAR}*/n end include RFC5234 include RFC3629 # CHAR8 = %x01-ff # ; any OCTET except NUL, %x00 CHAR8 = /[\x01-\xff]/n # list-wildcards = "%" / "*" LIST_WILDCARDS = /[%*]/n # quoted-specials = DQUOTE / "\" QUOTED_SPECIALS = /["\\]/n # resp-specials = "]" RESP_SPECIALS = /[\]]/n # atomish = 1* # ; We use "atomish" for msg-att and section, in order # ; to simplify "BODY[HEADER.FIELDS (foo bar)]". # # atom-specials = "(" / ")" / "{" / SP / CTL / list-wildcards / # quoted-specials / resp-specials # ATOM-CHAR = # atom = 1*ATOM-CHAR # ASTRING-CHAR = ATOM-CHAR / resp-specials # tag = 1* ATOM_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\\]]/n ASTRING_SPECIALS = /[(){ \x00-\x1f\x7f%*"\\]/n ASTRING_CHAR = CHAR - ASTRING_SPECIALS ATOM_CHAR = CHAR - ATOM_SPECIALS ATOM = /#{ATOM_CHAR}+/n ASTRING_CHARS = /#{ASTRING_CHAR}+/n ATOMISH = /#{ATOM_CHAR - /[\[]/ }+/ TAG = /#{ASTRING_CHAR - /[+]/ }+/ # TEXT-CHAR = TEXT_CHAR = CHAR - /[\r\n]/ # resp-text-code = ... / atom [SP 1*] CODE_TEXT_CHAR = TEXT_CHAR - RESP_SPECIALS CODE_TEXT = /#{CODE_TEXT_CHAR}+/n # RFC3501: # QUOTED-CHAR = / # "\" quoted-specials # RFC9051: # QUOTED-CHAR = / # "\" quoted-specials / UTF8-2 / UTF8-3 / UTF8-4 # RFC3501 & RFC9051: # quoted = DQUOTE *QUOTED-CHAR DQUOTE QUOTED_CHAR_safe = TEXT_CHAR - QUOTED_SPECIALS QUOTED_CHAR_esc = /\\#{QUOTED_SPECIALS}/n QUOTED_CHAR_rev1 = Regexp.union(QUOTED_CHAR_safe, QUOTED_CHAR_esc) QUOTED_CHAR_rev2 = Regexp.union(QUOTED_CHAR_rev1, UTF8_2, UTF8_3, UTF8_4) QUOTED_rev1 = /"(#{QUOTED_CHAR_rev1}*)"/n QUOTED_rev2 = /"(#{QUOTED_CHAR_rev2}*)"/n # RFC3501: # text = 1*TEXT-CHAR # RFC9051: # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4) # ; Non-ASCII text can only be returned # ; after ENABLE IMAP4rev2 command TEXT_rev1 = /#{TEXT_CHAR}+/ TEXT_rev2 = /#{Regexp.union TEXT_CHAR, UTF8_2, UTF8_3, UTF8_4}+/ # RFC3501: # literal = "{" number "}" CRLF *CHAR8 # ; Number represents the number of CHAR8s # RFC9051: # literal = "{" number64 ["+"] "}" CRLF *CHAR8 # ; represents the number of CHAR8s. # ; A non-synchronizing literal is distinguished # ; from a synchronizing literal by the presence of # ; "+" before the closing "}". # ; Non-synchronizing literals are not allowed when # ; sent from server to the client. LITERAL = /\{(\d+)\}\r\n/n module_function def unescape_quoted!(quoted) quoted &.gsub!(/\\(#{QUOTED_SPECIALS})/n, "\\1") &.force_encoding("UTF-8") end def unescape_quoted(quoted) quoted &.gsub(/\\(#{QUOTED_SPECIALS})/n, "\\1") &.force_encoding("UTF-8") end end # the default, used in most places BEG_REGEXP = /\G(?:\ (?# 1: SPACE )( )|\ (?# 2: ATOM prefixed with a compatible subtype)\ ((?:\ (?# 3: NIL )(NIL)|\ (?# 4: NUMBER )(\d+)|\ (?# 5: PLUS )(\+))\ (?# 6: ATOM remaining after prefix )(#{Patterns::ATOMISH})?\ (?# This enables greedy alternation without lookahead, in linear time.)\ )|\ (?# Also need to check for ATOM without a subtype prefix.)\ (?# 7: ATOM )(#{Patterns::ATOMISH})|\ (?# 8: QUOTED )#{Patterns::QUOTED_rev2}|\ (?# 9: LPAR )(\()|\ (?# 10: RPAR )(\))|\ (?# 11: BSLASH )(\\)|\ (?# 12: STAR )(\*)|\ (?# 13: LBRA )(\[)|\ (?# 14: RBRA )(\])|\ (?# 15: LITERAL )#{Patterns::LITERAL}|\ (?# 16: PERCENT )(%)|\ (?# 17: CRLF )(\r\n)|\ (?# 18: EOF )(\z))/ni # envelope, body(structure), namespaces DATA_REGEXP = /\G(?:\ (?# 1: SPACE )( )|\ (?# 2: NIL )(NIL)|\ (?# 3: NUMBER )(\d+)|\ (?# 4: QUOTED )#{Patterns::QUOTED_rev2}|\ (?# 5: LITERAL )#{Patterns::LITERAL}|\ (?# 6: LPAR )(\()|\ (?# 7: RPAR )(\)))/ni # text, after 'resp-text-code "]"' TEXT_REGEXP = /\G(#{Patterns::TEXT_rev2})/n # resp-text-code, after 'atom SP' CTEXT_REGEXP = /\G(#{Patterns::CODE_TEXT})/n Token = Struct.new(:symbol, :value) def_char_matchers :SP, " ", :T_SPACE def_char_matchers :lpar, "(", :T_LPAR def_char_matchers :rpar, ")", :T_RPAR def_char_matchers :lbra, "[", :T_LBRA def_char_matchers :rbra, "]", :T_RBRA # valid number ranges are not enforced by parser # number = 1*DIGIT # ; Unsigned 32-bit integer # ; (0 <= n < 4,294,967,296) def_token_matchers :number, T_NUMBER, coerce: Integer def_token_matchers :quoted, T_QUOTED # string = quoted / literal def_token_matchers :string, T_QUOTED, T_LITERAL # use where string represents "LABEL" values def_token_matchers :case_insensitive__string, T_QUOTED, T_LITERAL, send: :upcase # n.b: NIL? and NIL! return the "NIL" atom string (truthy) on success. # NIL? returns nil when it does *not* match def_token_matchers :NIL, T_NIL # In addition to explicitly uses of +tagged-ext-label+, use this to match # keywords when the grammar has not provided any extension syntax. # # Do *not* use this for labels where the grammar specifies extensions # can be +atom+, even if all currently defined labels would match. For # example response codes in +resp-text-code+. # # tagged-ext-label = tagged-label-fchar *tagged-label-char # ; Is a valid RFC 3501 "atom". # tagged-label-fchar = ALPHA / "-" / "_" / "." # tagged-label-char = tagged-label-fchar / DIGIT / ":" # # TODO: add to lexer and only match tagged-ext-label def_token_matchers :tagged_ext_label, T_ATOM, T_NIL, send: :upcase # atom = 1*ATOM-CHAR # ATOM-CHAR = ATOM_TOKENS = [T_ATOM, T_NUMBER, T_NIL, T_LBRA, T_PLUS] # ASTRING-CHAR = ATOM-CHAR / resp-specials # resp-specials = "]" ASTRING_CHARS_TOKENS = [*ATOM_TOKENS, T_RBRA].freeze ASTRING_TOKENS = [T_QUOTED, *ASTRING_CHARS_TOKENS, T_LITERAL].freeze # atom = 1*ATOM-CHAR # # TODO: match atom entirely by regexp (in the "lexer") def atom; -combine_adjacent(*ATOM_TOKENS) end # the #accept version of #atom def atom?; -combine_adjacent(*ATOM_TOKENS) if lookahead?(*ATOM_TOKENS) end # Returns atom.upcase def case_insensitive__atom; -combine_adjacent(*ATOM_TOKENS).upcase end # Returns atom?&.upcase def case_insensitive__atom? -combine_adjacent(*ATOM_TOKENS).upcase if lookahead?(*ATOM_TOKENS) end # TODO: handle astring_chars entirely inside the lexer def astring_chars combine_adjacent(*ASTRING_CHARS_TOKENS) end # astring = 1*ASTRING-CHAR / string def astring lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string end def astring? lookahead?(*ASTRING_CHARS_TOKENS) ? astring_chars : string? end # Use #label or #label_in to assert specific known labels # (+tagged-ext-label+ only, not +atom+). def label(word) (val = tagged_ext_label) == word and return val parse_error("unexpected atom %p, expected %p instead", val, word) end # nstring = string / nil def nstring NIL? ? nil : string end def nquoted NIL? ? nil : quoted end # use where nstring represents "LABEL" values def case_insensitive__nstring NIL? ? nil : case_insensitive__string end # valid number ranges are not enforced by parser # number64 = 1*DIGIT # ; Unsigned 63-bit integer # ; (0 <= n <= 9,223,372,036,854,775,807) alias number64 number alias number64? number? def response token = lookahead case token.symbol when T_PLUS result = continue_req when T_STAR result = response_untagged else result = response_tagged end while lookahead.symbol == T_SPACE # Ignore trailing space for Microsoft Exchange Server shift_token end match(T_CRLF) match(T_EOF) return result end def continue_req match(T_PLUS) token = lookahead if token.symbol == T_SPACE shift_token return ContinuationRequest.new(resp_text, @str) else return ContinuationRequest.new(ResponseText.new(nil, ""), @str) end end def response_untagged match(T_STAR) match(T_SPACE) token = lookahead if token.symbol == T_NUMBER return numeric_response elsif token.symbol == T_ATOM case token.value when /\A(?:OK|NO|BAD|BYE|PREAUTH)\z/ni return response_cond when /\A(?:FLAGS)\z/ni return flags_response when /\A(?:ID)\z/ni return id_response when /\A(?:LIST|LSUB|XLIST)\z/ni return list_response when /\A(?:NAMESPACE)\z/ni return namespace_response when /\A(?:QUOTA)\z/ni return getquota_response when /\A(?:QUOTAROOT)\z/ni return getquotaroot_response when /\A(?:ACL)\z/ni return getacl_response when /\A(?:SEARCH|SORT)\z/ni return search_response when /\A(?:THREAD)\z/ni return thread_response when /\A(?:STATUS)\z/ni return status_response when /\A(?:CAPABILITY)\z/ni return capability_data__untagged when /\A(?:NOOP)\z/ni return ignored_response when /\A(?:ENABLED)\z/ni return enable_data else return text_response end else parse_error("unexpected token %s", token.symbol) end end def response_tagged tag = astring_chars match(T_SPACE) token = match(T_ATOM) name = token.value.upcase match(T_SPACE) return TaggedResponse.new(tag, name, resp_text, @str) end def response_cond token = match(T_ATOM) name = token.value.upcase match(T_SPACE) return UntaggedResponse.new(name, resp_text, @str) end def numeric_response n = number match(T_SPACE) token = match(T_ATOM) name = token.value.upcase case name when "EXISTS", "RECENT", "EXPUNGE" return UntaggedResponse.new(name, n, @str) when "FETCH" shift_token match(T_SPACE) data = FetchData.new(n, msg_att(n)) return UntaggedResponse.new(name, data, @str) end end def msg_att(n) match(T_LPAR) attr = {} while true token = lookahead case token.symbol when T_RPAR shift_token break when T_SPACE shift_token next end case token.value when /\A(?:ENVELOPE)\z/ni name, val = envelope_data when /\A(?:FLAGS)\z/ni name, val = flags_data when /\A(?:INTERNALDATE)\z/ni name, val = internaldate_data when /\A(?:RFC822(?:\.HEADER|\.TEXT)?)\z/ni name, val = rfc822_text when /\A(?:RFC822\.SIZE)\z/ni name, val = rfc822_size when /\A(?:BODY(?:STRUCTURE)?)\z/ni name, val = body_data when /\A(?:UID)\z/ni name, val = uid_data when /\A(?:MODSEQ)\z/ni name, val = modseq_data else parse_error("unknown attribute `%s' for {%d}", token.value, n) end attr[name] = val end return attr end def envelope_data token = match(T_ATOM) name = token.value.upcase match(T_SPACE) return name, envelope end def envelope @lex_state = EXPR_DATA token = lookahead if token.symbol == T_NIL shift_token result = nil else match(T_LPAR) date = nstring match(T_SPACE) subject = nstring match(T_SPACE) from = address_list match(T_SPACE) sender = address_list match(T_SPACE) reply_to = address_list match(T_SPACE) to = address_list match(T_SPACE) cc = address_list match(T_SPACE) bcc = address_list match(T_SPACE) in_reply_to = nstring match(T_SPACE) message_id = nstring match(T_RPAR) result = Envelope.new(date, subject, from, sender, reply_to, to, cc, bcc, in_reply_to, message_id) end @lex_state = EXPR_BEG return result end def flags_data token = match(T_ATOM) name = token.value.upcase match(T_SPACE) return name, flag_list end def internaldate_data token = match(T_ATOM) name = token.value.upcase match(T_SPACE) token = match(T_QUOTED) return name, token.value end def rfc822_text token = match(T_ATOM) name = token.value.upcase token = lookahead if token.symbol == T_LBRA shift_token match(T_RBRA) end match(T_SPACE) return name, nstring end def rfc822_size token = match(T_ATOM) name = token.value.upcase match(T_SPACE) return name, number end def body_data token = match(T_ATOM) name = token.value.upcase token = lookahead if token.symbol == T_SPACE shift_token return name, body end name.concat(section) token = lookahead if token.symbol == T_ATOM name.concat(token.value) shift_token end match(T_SPACE) data = nstring return name, data end # RFC-3501 & RFC-9051: # body = "(" (body-type-1part / body-type-mpart) ")" def body @lex_state = EXPR_DATA lpar; result = peek_lpar? ? body_type_mpart : body_type_1part; rpar result ensure @lex_state = EXPR_BEG end alias lookahead_body? lookahead_lpar? # RFC-3501 & RFC9051: # body-type-1part = (body-type-basic / body-type-msg / body-type-text) # [SP body-ext-1part] def body_type_1part # This regexp peek is a performance optimization. # The lookahead fallback would work fine too. m = peek_re(/\G(?: (? "TEXT" \s "[^"]+" ) |(? "MESSAGE" \s "(?:RFC822|GLOBAL)" ) |(? "[^"]+" \s "[^"]+" ) |(? "MIXED" ) )/nix) choice = m&.named_captures&.compact&.keys&.first # In practice, the following line should never be used. But the ABNF # *does* allow literals, and this will handle them. choice ||= lookahead_case_insensitive__string! case choice when "BASIC" then body_type_basic # => BodyTypeBasic when "MESSAGE" then body_type_msg # => BodyTypeMessage | BodyTypeBasic when "TEXT" then body_type_text # => BodyTypeText when "MIXED" then body_type_mixed # => BodyTypeMultipart (server bug) else body_type_basic # might be a bug; server's or ours? end end # RFC-3501 & RFC9051: # body-type-basic = media-basic SP body-fields def body_type_basic type = media_basic # n.b. "basic" type isn't enforced here if lookahead_rpar? then return BodyTypeBasic.new(*type) end # invalid SP!; flds = body_fields SP? and exts = body_ext_1part BodyTypeBasic.new(*type, *flds, *exts) end # RFC-3501 & RFC-9051: # body-type-text = media-text SP body-fields SP body-fld-lines def body_type_text type = media_text SP!; flds = body_fields SP!; lines = body_fld_lines SP? and exts = body_ext_1part BodyTypeText.new(*type, *flds, lines, *exts) end # RFC-3501 & RFC-9051: # body-type-msg = media-message SP body-fields SP envelope # SP body SP body-fld-lines def body_type_msg # n.b. "message/rfc822" type isn't enforced here type = media_message SP!; flds = body_fields # Sometimes servers send body-type-basic when body-type-msg should be. # E.g: when a message/rfc822 part has "Content-Disposition: attachment". # # * SP "(" --> SP envelope --> continue as body-type-msg # * ")" --> no body-ext-1part --> completed body-type-basic # * SP nstring --> SP body-fld-md5 # --> SP body-ext-1part --> continue as body-type-basic # # It's probably better to return BodyTypeBasic---even for # "message/rfc822"---than BodyTypeMessage with invalid fields. unless peek_str?(" (") SP? and exts = body_ext_1part return BodyTypeBasic.new(*type, *flds, *exts) end SP!; env = envelope SP!; bdy = body SP!; lines = body_fld_lines SP? and exts = body_ext_1part BodyTypeMessage.new(*type, *flds, env, bdy, lines, *exts) end # This is a malformed body-type-mpart with no subparts. def body_type_mixed # warn "malformed body-type-mpart: multipart/mixed with no parts." type = media_subtype # => "MIXED" SP? and exts = body_ext_mpart BodyTypeMultipart.new("MULTIPART", type, nil, *exts) end # RFC-3501 & RFC-9051: # body-type-mpart = 1*body SP media-subtype # [SP body-ext-mpart] def body_type_mpart parts = [body]; parts << body until SP?; msubtype = media_subtype SP? and exts = body_ext_mpart BodyTypeMultipart.new("MULTIPART", msubtype, parts, *exts) end # n.b. this handles both type and subtype # # RFC-3501 vs RFC-9051: # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" / # "MESSAGE" / # "VIDEO") DQUOTE) / string) SP media-subtype # media-basic = ((DQUOTE ("APPLICATION" / "AUDIO" / "IMAGE" / # "FONT" / "MESSAGE" / "MODEL" / # "VIDEO") DQUOTE) / string) SP media-subtype # # media-message = DQUOTE "MESSAGE" DQUOTE SP # DQUOTE "RFC822" DQUOTE # media-message = DQUOTE "MESSAGE" DQUOTE SP # DQUOTE ("RFC822" / "GLOBAL") DQUOTE # # RFC-3501 & RFC-9051: # media-text = DQUOTE "TEXT" DQUOTE SP media-subtype # media-subtype = string def media_type mtype = case_insensitive__string SP? or return mtype, nil # ??? quirky! msubtype = media_subtype return mtype, msubtype end # TODO: check types alias media_basic media_type # */* --- catchall alias media_message media_type # message/rfc822, message/global alias media_text media_type # text/* alias media_subtype case_insensitive__string # RFC-3501 & RFC-9051: # body-fields = body-fld-param SP body-fld-id SP body-fld-desc SP # body-fld-enc SP body-fld-octets def body_fields fields = [] fields << body_fld_param; SP! fields << body_fld_id; SP! fields << body_fld_desc; SP! fields << body_fld_enc; SP! fields << body_fld_octets fields end # RFC3501, RFC9051: # body-fld-param = "(" string SP string *(SP string SP string) ")" / nil def body_fld_param return if NIL? param = {} lpar name = case_insensitive__string; SP!; param[name] = string while SP? name = case_insensitive__string; SP!; param[name] = string end rpar param end # RFC2060 # body_ext_1part ::= body_fld_md5 [SPACE body_fld_dsp # [SPACE body_fld_lang # [SPACE 1#body_extension]]] # ;; MUST NOT be returned on non-extensible # ;; "BODY" fetch # RFC3501 & RFC9051 # body-ext-1part = body-fld-md5 [SP body-fld-dsp [SP body-fld-lang # [SP body-fld-loc *(SP body-extension)]]] # ; MUST NOT be returned on non-extensible # ; "BODY" fetch def body_ext_1part fields = []; fields << body_fld_md5 SP? or return fields; fields << body_fld_dsp SP? or return fields; fields << body_fld_lang SP? or return fields; fields << body_fld_loc SP? or return fields; fields << body_extensions fields end # RFC-2060: # body_ext_mpart = body_fld_param [SP body_fld_dsp SP body_fld_lang # [SP 1#body_extension]] # ;; MUST NOT be returned on non-extensible # ;; "BODY" fetch # RFC-3501 & RFC-9051: # body-ext-mpart = body-fld-param [SP body-fld-dsp [SP body-fld-lang # [SP body-fld-loc *(SP body-extension)]]] # ; MUST NOT be returned on non-extensible # ; "BODY" fetch def body_ext_mpart fields = []; fields << body_fld_param SP? or return fields; fields << body_fld_dsp SP? or return fields; fields << body_fld_lang SP? or return fields; fields << body_fld_loc SP? or return fields; fields << body_extensions fields end alias body_fld_desc nstring alias body_fld_id nstring alias body_fld_loc nstring alias body_fld_lines number64 # number in 3501, number64 in 9051 alias body_fld_md5 nstring alias body_fld_octets number # RFC-3501 & RFC-9051: # body-fld-enc = (DQUOTE ("7BIT" / "8BIT" / "BINARY" / "BASE64"/ # "QUOTED-PRINTABLE") DQUOTE) / string alias body_fld_enc case_insensitive__string # body-fld-dsp = "(" string SP body-fld-param ")" / nil def body_fld_dsp return if NIL? lpar; dsp_type = case_insensitive__string SP!; param = body_fld_param rpar ContentDisposition.new(dsp_type, param) end # body-fld-lang = nstring / "(" string *(SP string) ")" def body_fld_lang if lpar? result = [case_insensitive__string] result << case_insensitive__string while SP? result else case_insensitive__nstring end end # body-extension *(SP body-extension) def body_extensions result = [] result << body_extension; while SP? do result << body_extension end result end # body-extension = nstring / number / number64 / # "(" body-extension *(SP body-extension) ")" # ; Future expansion. Client implementations # ; MUST accept body-extension fields. Server # ; implementations MUST NOT generate # ; body-extension fields except as defined by # ; future Standard or Standards Track # ; revisions of this specification. def body_extension if (uint = number64?) then uint elsif lpar? then exts = body_extensions; rpar; exts else nstring end end def section str = String.new token = match(T_LBRA) str.concat(token.value) token = match(T_ATOM, T_NUMBER, T_RBRA) if token.symbol == T_RBRA str.concat(token.value) return str end str.concat(token.value) token = lookahead if token.symbol == T_SPACE shift_token str.concat(token.value) token = match(T_LPAR) str.concat(token.value) while true token = lookahead case token.symbol when T_RPAR str.concat(token.value) shift_token break when T_SPACE shift_token str.concat(token.value) end str.concat(format_string(astring)) end end token = match(T_RBRA) str.concat(token.value) return str end def format_string(str) case str when "" return '""' when /[\x80-\xff\r\n]/n # literal return "{" + str.bytesize.to_s + "}" + CRLF + str when /[(){ \x00-\x1f\x7f%*"\\]/n # quoted string return '"' + str.gsub(/["\\]/n, "\\\\\\&") + '"' else # atom return str end end def uid_data token = match(T_ATOM) name = token.value.upcase match(T_SPACE) return name, number end def modseq_data token = match(T_ATOM) name = token.value.upcase match(T_SPACE) match(T_LPAR) modseq = number match(T_RPAR) return name, modseq end def ignored_response while lookahead.symbol != T_CRLF shift_token end return IgnoredResponse.new(@str) end def text_response token = match(T_ATOM) name = token.value.upcase match(T_SPACE) return UntaggedResponse.new(name, text) end def flags_response token = match(T_ATOM) name = token.value.upcase match(T_SPACE) return UntaggedResponse.new(name, flag_list, @str) end def list_response token = match(T_ATOM) name = token.value.upcase match(T_SPACE) return UntaggedResponse.new(name, mailbox_list, @str) end def mailbox_list attr = flag_list match(T_SPACE) token = match(T_QUOTED, T_NIL) if token.symbol == T_NIL delim = nil else delim = token.value end match(T_SPACE) name = astring return MailboxList.new(attr, delim, name) end def getquota_response # If quota never established, get back # `NO Quota root does not exist'. # If quota removed, get `()' after the # folder spec with no mention of `STORAGE'. token = match(T_ATOM) name = token.value.upcase match(T_SPACE) mailbox = astring match(T_SPACE) match(T_LPAR) token = lookahead case token.symbol when T_RPAR shift_token data = MailboxQuota.new(mailbox, nil, nil) return UntaggedResponse.new(name, data, @str) when T_ATOM shift_token match(T_SPACE) token = match(T_NUMBER) usage = token.value match(T_SPACE) token = match(T_NUMBER) quota = token.value match(T_RPAR) data = MailboxQuota.new(mailbox, usage, quota) return UntaggedResponse.new(name, data, @str) else parse_error("unexpected token %s", token.symbol) end end def getquotaroot_response # Similar to getquota, but only admin can use getquota. token = match(T_ATOM) name = token.value.upcase match(T_SPACE) mailbox = astring quotaroots = [] while true token = lookahead break unless token.symbol == T_SPACE shift_token quotaroots.push(astring) end data = MailboxQuotaRoot.new(mailbox, quotaroots) return UntaggedResponse.new(name, data, @str) end def getacl_response token = match(T_ATOM) name = token.value.upcase match(T_SPACE) mailbox = astring data = [] token = lookahead if token.symbol == T_SPACE shift_token while true token = lookahead case token.symbol when T_CRLF break when T_SPACE shift_token end user = astring match(T_SPACE) rights = astring data.push(MailboxACLItem.new(user, rights, mailbox)) end end return UntaggedResponse.new(name, data, @str) end def search_response token = match(T_ATOM) name = token.value.upcase token = lookahead if token.symbol == T_SPACE shift_token data = [] while true token = lookahead case token.symbol when T_CRLF break when T_SPACE shift_token when T_NUMBER data.push(number) when T_LPAR # TODO: include the MODSEQ value in a response shift_token match(T_ATOM) match(T_SPACE) match(T_NUMBER) match(T_RPAR) end end else data = [] end return UntaggedResponse.new(name, data, @str) end def thread_response token = match(T_ATOM) name = token.value.upcase token = lookahead if token.symbol == T_SPACE threads = [] while true shift_token token = lookahead case token.symbol when T_LPAR threads << thread_branch(token) when T_CRLF break end end else # no member threads = [] end return UntaggedResponse.new(name, threads, @str) end def thread_branch(token) rootmember = nil lastmember = nil while true shift_token # ignore first T_LPAR token = lookahead case token.symbol when T_NUMBER # new member newmember = ThreadMember.new(number, []) if rootmember.nil? rootmember = newmember else lastmember.children << newmember end lastmember = newmember when T_SPACE # do nothing when T_LPAR if rootmember.nil? # dummy member lastmember = rootmember = ThreadMember.new(nil, []) end lastmember.children << thread_branch(token) when T_RPAR break end end return rootmember end def status_response token = match(T_ATOM) name = token.value.upcase match(T_SPACE) mailbox = astring match(T_SPACE) match(T_LPAR) attr = {} while true token = lookahead case token.symbol when T_RPAR shift_token break when T_SPACE shift_token end token = match(T_ATOM) key = token.value.upcase match(T_SPACE) val = number attr[key] = val end data = StatusData.new(mailbox, attr) return UntaggedResponse.new(name, data, @str) end # The presence of "IMAP4rev1" or "IMAP4rev2" is unenforced here. # The grammar rule is used by both response-data and resp-text-code. # But this method only returns UntaggedResponse (response-data). # # RFC3501: # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev1" # *(SP capability) # RFC9051: # capability-data = "CAPABILITY" *(SP capability) SP "IMAP4rev2" # *(SP capability) def capability_data__untagged UntaggedResponse.new label("CAPABILITY"), capability__list, @str end # enable-data = "ENABLED" *(SP capability) def enable_data UntaggedResponse.new label("ENABLED"), capability__list, @str end # As a workaround for buggy servers, allow a trailing SP: # *(SP capapility) [SP] def capability__list data = []; while _ = SP? && capability? do data << _ end; data end # capability = ("AUTH=" auth-type) / atom # ; New capabilities MUST begin with "X" or be # ; registered with IANA as standard or # ; standards-track alias capability case_insensitive__atom alias capability? case_insensitive__atom? def id_response token = match(T_ATOM) name = token.value.upcase match(T_SPACE) token = match(T_LPAR, T_NIL) if token.symbol == T_NIL return UntaggedResponse.new(name, nil, @str) else data = {} while true token = lookahead case token.symbol when T_RPAR shift_token break when T_SPACE shift_token next else key = string match(T_SPACE) val = nstring data[key] = val end end return UntaggedResponse.new(name, data, @str) end end # namespace-response = "NAMESPACE" SP namespace # SP namespace SP namespace # ; The first Namespace is the Personal Namespace(s). # ; The second Namespace is the Other Users' # ; Namespace(s). # ; The third Namespace is the Shared Namespace(s). def namespace_response name = label("NAMESPACE") @lex_state = EXPR_DATA data = Namespaces.new((SP!; namespace), (SP!; namespace), (SP!; namespace)) UntaggedResponse.new(name, data, @str) ensure @lex_state = EXPR_BEG end # namespace = nil / "(" 1*namespace-descr ")" def namespace NIL? and return [] lpar list = [namespace_descr] list << namespace_descr until rpar? list end # namespace-descr = "(" string SP # (DQUOTE QUOTED-CHAR DQUOTE / nil) # [namespace-response-extensions] ")" def namespace_descr lpar prefix = string; SP! delimiter = nquoted # n.b: should only accept single char extensions = namespace_response_extensions rpar Namespace.new(prefix, delimiter, extensions) end # namespace-response-extensions = *namespace-response-extension # namespace-response-extension = SP string SP # "(" string *(SP string) ")" def namespace_response_extensions data = {} while SP? name = string; SP! lpar data[name] ||= [] data[name] << string data[name] << string while SP? rpar end data end # TEXT-CHAR = # RFC3501: # text = 1*TEXT-CHAR # RFC9051: # text = 1*(TEXT-CHAR / UTF8-2 / UTF8-3 / UTF8-4) # ; Non-ASCII text can only be returned # ; after ENABLE IMAP4rev2 command def text match_re(TEXT_REGEXP, "text")[0].force_encoding("UTF-8") end # an "accept" versiun of #text def text? accept_re(TEXT_REGEXP)&.[](0)&.force_encoding("UTF-8") end # RFC3501: # resp-text = ["[" resp-text-code "]" SP] text # RFC9051: # resp-text = ["[" resp-text-code "]" SP] [text] # # We leniently re-interpret this as # resp-text = ["[" resp-text-code "]" [SP [text]] / [text] def resp_text if lbra? code = resp_text_code; rbra ResponseText.new(code, SP? && text? || "") else ResponseText.new(nil, text? || "") end end # See https://www.rfc-editor.org/errata/rfc3501 # # resp-text-code = "ALERT" / # "BADCHARSET" [SP "(" charset *(SP charset) ")" ] / # capability-data / "PARSE" / # "PERMANENTFLAGS" SP "(" # [flag-perm *(SP flag-perm)] ")" / # "READ-ONLY" / "READ-WRITE" / "TRYCREATE" / # "UIDNEXT" SP nz-number / "UIDVALIDITY" SP nz-number / # "UNSEEN" SP nz-number / # atom [SP 1*] # # +UIDPLUS+ ABNF:: https://www.rfc-editor.org/rfc/rfc4315.html#section-4 # resp-text-code =/ resp-code-apnd / resp-code-copy / "UIDNOTSTICKY" def resp_text_code token = match(T_ATOM) name = token.value.upcase case name when /\A(?:ALERT|PARSE|READ-ONLY|READ-WRITE|TRYCREATE|NOMODSEQ)\z/n result = ResponseCode.new(name, nil) when /\A(?:BADCHARSET)\z/n result = ResponseCode.new(name, charset_list) when /\A(?:CAPABILITY)\z/ni result = ResponseCode.new(name, capability__list) when /\A(?:PERMANENTFLAGS)\z/n match(T_SPACE) result = ResponseCode.new(name, flag_list) when /\A(?:UIDVALIDITY|UIDNEXT|UNSEEN)\z/n match(T_SPACE) result = ResponseCode.new(name, number) when /\A(?:APPENDUID)\z/n result = ResponseCode.new(name, resp_code_apnd__data) when /\A(?:COPYUID)\z/n result = ResponseCode.new(name, resp_code_copy__data) else token = lookahead if token.symbol == T_SPACE shift_token result = ResponseCode.new(name, text_chars_except_rbra) else result = ResponseCode.new(name, nil) end end return result end # 1* def text_chars_except_rbra match_re(CTEXT_REGEXP, '1*')[0] end def charset_list result = [] if accept(T_SPACE) match(T_LPAR) result << charset while accept(T_SPACE) result << charset end match(T_RPAR) end result end # already matched: "APPENDUID" # # +UIDPLUS+ ABNF:: https://www.rfc-editor.org/rfc/rfc4315.html#section-4 # resp-code-apnd = "APPENDUID" SP nz-number SP append-uid # append-uid = uniqueid # append-uid =/ uid-set # ; only permitted if client uses [MULTIAPPEND] # ; to append multiple messages. # # n.b, uniqueid ⊂ uid-set. To avoid inconsistent return types, we always # match uid_set even if that returns a single-member array. # def resp_code_apnd__data match(T_SPACE); validity = number match(T_SPACE); dst_uids = uid_set # uniqueid ⊂ uid-set UIDPlusData.new(validity, nil, dst_uids) end # already matched: "COPYUID" # # resp-code-copy = "COPYUID" SP nz-number SP uid-set SP uid-set def resp_code_copy__data match(T_SPACE); validity = number match(T_SPACE); src_uids = uid_set match(T_SPACE); dst_uids = uid_set UIDPlusData.new(validity, src_uids, dst_uids) end def address_list token = lookahead if token.symbol == T_NIL shift_token return nil else result = [] match(T_LPAR) while true token = lookahead case token.symbol when T_RPAR shift_token break when T_SPACE shift_token end result.push(address) end return result end end ADDRESS_REGEXP = /\G\ (?# 1: NAME )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \ (?# 2: ROUTE )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \ (?# 3: MAILBOX )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)") \ (?# 4: HOST )(?:NIL|"((?:[^\x80-\xff\x00\r\n"\\]|\\["\\])*)")\ \)/ni def address match(T_LPAR) if @str.index(ADDRESS_REGEXP, @pos) # address does not include literal. @pos = $~.end(0) name = $1 route = $2 mailbox = $3 host = $4 for s in [name, route, mailbox, host] Patterns.unescape_quoted! s end else name = nstring match(T_SPACE) route = nstring match(T_SPACE) mailbox = nstring match(T_SPACE) host = nstring match(T_RPAR) end return Address.new(name, route, mailbox, host) end FLAG_REGEXP = /\ (?# FLAG )\\([^\x80-\xff(){ \x00-\x1f\x7f%"\\]+)|\ (?# ATOM )([^\x80-\xff(){ \x00-\x1f\x7f%*"\\]+)/n def flag_list if @str.index(/\(([^)]*)\)/ni, @pos) @pos = $~.end(0) return $1.scan(FLAG_REGEXP).collect { |flag, atom| if atom atom else flag.capitalize.intern end } else parse_error("invalid flag list") end end # See https://www.rfc-editor.org/errata/rfc3501 # # charset = atom / quoted def charset if token = accept(T_QUOTED) token.value else atom end end # RFC-4315 (UIDPLUS) or RFC9051 (IMAP4rev2): # uid-set = (uniqueid / uid-range) *("," uid-set) # uid-range = (uniqueid ":" uniqueid) # ; two uniqueid values and all values # ; between these two regardless of order. # ; Example: 2:4 and 4:2 are equivalent. # uniqueid = nz-number # ; Strictly ascending def uid_set token = match(T_NUMBER, T_ATOM) case token.symbol when T_NUMBER then [Integer(token.value)] when T_ATOM token.value.split(",").flat_map {|range| range = range.split(":").map {|uniqueid| Integer(uniqueid) } range.size == 1 ? range : Range.new(range.min, range.max).to_a } end end def nil_atom match(T_NIL) return nil end SPACES_REGEXP = /\G */n # The RFC is very strict about this and usually we should be too. # But skipping spaces is usually a safe workaround for buggy servers. # # This advances @pos directly so it's safe before changing @lex_state. def accept_spaces shift_token if @token&.symbol == T_SPACE if @str.index(SPACES_REGEXP, @pos) @pos = $~.end(0) end end def next_token case @lex_state when EXPR_BEG if @str.index(BEG_REGEXP, @pos) @pos = $~.end(0) if $1 return Token.new(T_SPACE, $+) elsif $2 && $6 # greedily match ATOM, prefixed with NUMBER, NIL, or PLUS. return Token.new(T_ATOM, $2) elsif $3 return Token.new(T_NIL, $+) elsif $4 return Token.new(T_NUMBER, $+) elsif $5 return Token.new(T_PLUS, $+) elsif $7 # match ATOM, without a NUMBER, NIL, or PLUS prefix return Token.new(T_ATOM, $+) elsif $8 return Token.new(T_QUOTED, Patterns.unescape_quoted($+)) elsif $9 return Token.new(T_LPAR, $+) elsif $10 return Token.new(T_RPAR, $+) elsif $11 return Token.new(T_BSLASH, $+) elsif $12 return Token.new(T_STAR, $+) elsif $13 return Token.new(T_LBRA, $+) elsif $14 return Token.new(T_RBRA, $+) elsif $15 len = $+.to_i val = @str[@pos, len] @pos += len return Token.new(T_LITERAL, val) elsif $16 return Token.new(T_PERCENT, $+) elsif $17 return Token.new(T_CRLF, $+) elsif $18 return Token.new(T_EOF, $+) else parse_error("[Net::IMAP BUG] BEG_REGEXP is invalid") end else @str.index(/\S*/n, @pos) parse_error("unknown token - %s", $&.dump) end when EXPR_DATA if @str.index(DATA_REGEXP, @pos) @pos = $~.end(0) if $1 return Token.new(T_SPACE, $+) elsif $2 return Token.new(T_NIL, $+) elsif $3 return Token.new(T_NUMBER, $+) elsif $4 return Token.new(T_QUOTED, Patterns.unescape_quoted($+)) elsif $5 len = $+.to_i val = @str[@pos, len] @pos += len return Token.new(T_LITERAL, val) elsif $6 return Token.new(T_LPAR, $+) elsif $7 return Token.new(T_RPAR, $+) else parse_error("[Net::IMAP BUG] DATA_REGEXP is invalid") end else @str.index(/\S*/n, @pos) parse_error("unknown token - %s", $&.dump) end else parse_error("invalid @lex_state - %s", @lex_state.inspect) end end end end end