#--
# Copyright (C) 2001, 2002, 2003, 2008 Matt Armstrong. All rights
# reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. The name of the author may not be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
# IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
# OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
# NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#++
# Implements the RMail::Address, RMail::Address::List, and
# RMail::Address::Parser classes. Together, these classes allow you
# to robustly parse, manipulate, and generate RFC2822 email addresses
# and address lists.
module RMail
# This class provides the following functionality:
#
# * Parses RFC2822 address lists into a list of Address
# objects (see #parse).
#
# * Format Address objects as appropriate for insertion into email
# messages (see #format).
#
# * Allows manipulation of the various parts of the address (see
# #local=, #domain=, #display_name=, #comments=).
class Address
ATEXT = '[\w=!#$%&\'*+-?^\`{|}~]+'
# Create a new address. If the +string+ argument is not nil, it
# is parsed for mail addresses and if one is found, it is used to
# initialize this object.
def initialize(string = nil)
@local = @domain = @comments = @display_name = nil
if string.kind_of?(String)
addrs = Address.parse(string)
if addrs.length > 0
@local = addrs[0].local
@domain = addrs[0].domain
@comments = addrs[0].comments
@display_name = addrs[0].display_name
end
else
raise ArgumentError unless string.nil?
end
end
# Compare this address with another based on the email address
# portion only (any display name and comments are ignored). If
# the other object is not an RMail::Address, it is coerced into a
# string with its to_str method and then parsed into an
# RMail::Address object.
def <=>(other)
if !other.kind_of?(RMail::Address)
other = RMail::Address.new(other.to_str)
end
cmp = (@local || '') <=> (other.local || '')
if cmp == 0
cmp = (@domain || '') <=> (other.domain || '')
end
return cmp
end
include Comparable
# Return a hash value for this address. This is based solely on
# the email address portion (any display name and comments are
# ignored).
def hash
address.hash
end
# Return true if the two objects are equal. Do this based solely
# on the email address portion (any display name and comments are
# ignored). Fails if the other object is not an RMail::Address
# object.
def eql?(other)
raise TypeError unless other.kind_of?(RMail::Address)
@local.eql?(other.local) and @domain.eql?(other.domain)
end
# Retrieve the local portion of the mail address. This is the
# portion that precedes the @ sign.
def local
@local
end
# Assign the local portion of the mail address. This is the
# portion that precedes the @ sign.
def local=(l)
raise ArgumentError unless l.nil? || l.kind_of?(String)
@local = l
end
# Returns the display name of this address. The display name is
# present only for "angle addr" style addresses such as:
#
# John Doe
#
# In this case, the display name will be "John Doe". In
# particular this old style address has no display name:
#
# bobs@example.net (Bob Smith)
#
# See also display_name=, #name
def display_name
@display_name
end
# Assign a display name to this address. See display_name for a
# definition of what this is.
#
# See also display_name
def display_name=(str)
unless str.nil? || str.kind_of?(String)
raise ArgumentError, 'not a string'
end
@display_name = str
@display_name = nil if @display_name == ''
end
# Returns a best guess at a display name for this email address.
# This function first checks if the address has a true display
# name (see display_name) and returns it if so. Otherwise, if the
# address has any comments, the last comment will be returned.
#
# In most cases, this will behave reasonably. For example, it
# will return "Bob Smith" for this address:
#
# bobs@example.net (Bob Smith)
#
# See also display_name, #comments, #comments=
def name
@display_name || (@comments && @comments.last)
end
# Returns the comments in this address as an array of strings.
def comments
@comments
end
# Set the comments for this address. The +comments+ argument can
# be a string, or an array of strings. In either case, any
# existing comments are replaced.
#
# See also #comments, #name
def comments=(comments)
case comments
when nil
@comments = comments
when Array
@comments = comments
when String
@comments = [ comments ]
else
raise TypeError, "Argument to RMail::Address#comments= must be " +
"String, Array or nil, was #{comments.type}."
end
@comments.freeze
end
# Retrieve to the domain portion of the mail address. This is the
# portion after the @ sign.
def domain
@domain
end
# Assign a domain name to this address. This is the portion after
# the @ sign. Any existing domain name will be changed.
def domain=(domain)
@domain = if domain.nil? or domain == ''
nil
else
raise ArgumentError unless domain.kind_of?(String)
domain.strip
end
end
# Returns the email address portion of the address (i.e. without a
# display name, angle addresses, or comments).
#
# The string returned is not suitable for insertion into an
# e-mail. RFC2822 quoting rules are not followed. The raw
# address is returned instead.
#
# For example, if the local part requires quoting, this function
# will not perform the quoting (see #format for that). So this
# function can returns strings such as:
#
# "address with no quoting@example.net"
#
# See also #format
def address
if @domain.nil?
@local
else
@local + '@' + @domain
end
end
# Return this address as a String formated as appropriate for
# insertion into a mail message.
def format
display_name = if @display_name.nil?
nil
elsif @display_name =~ /^[-\/\w=!#\$%&'*+?^`{|}~ ]+$/
@display_name
else
'"' + @display_name.gsub(/["\\]/, '\\\\\&') + '"'
end
local = if (@local !~ /^[-\w=!#\$%&'*+?^`{|}~\.\/]+$/ ||
@local =~ /^\./ ||
@local =~ /\.$/ ||
@local =~ /\.\./)
'"' + @local.gsub(/["\\]/, '\\\\\&') + '"'
else
@local
end
domain = if (!@domain.nil? and
(@domain !~ /^[-\w=!#\$%&'*+?^`{|}~\.\/]+$/ ||
@domain =~ /^\./ ||
@domain =~ /\.$/ ||
@domain =~ /\.\./))
then
'[' + if @domain =~ /^\[(.*)\]$/
$1
else
@domain
end.gsub(/[\[\]\\]/, '\\\\\&') + ']'
else
@domain
end
address = if domain.nil?
local
elsif !display_name.nil? or domain[-1] == ?]
'<' + local + '@' + domain + '>'
else
local + '@' + domain
end
comments = nil
comments = unless @comments.nil?
@comments.collect { |c|
'(' + c.gsub(/[()\\]/, '\\\\\&') + ')'
}.join(' ')
end
[display_name, address, comments].compact.join(' ')
end
# Addresses can be converted into strings.
alias :to_str :format
# This class provides a facility to parse a string containing one
# or more RFC2822 addresses into an array of RMail::Address
# objects. You can use it directly, but it is more conveniently
# used with the RMail::Address.parse method.
class Parser
# Create a RMail::Address::Parser object that will parse
# +string+. See also the RMail::Address.parse method.
def initialize(string)
@string = string
end
# This function attempts to extract mailing addresses from the
# string passed to #new. The function returns an
# RMail::Address::List of RMail::Address objects
# (RMail::Address::List is a subclass of Array). A malformed
# input string will not generate an exception. Instead, the
# array returned will simply not contained the malformed
# addresses.
#
# The string is expected to be in a valid format as documented
# in RFC2822's mailbox-list grammar. This will work for lists
# of addresses in the To:, From:, etc. headers
# in email.
def parse
@lexemes = []
@tokens = []
@addresses = RMail::Address::List.new
@errors = 0
new_address
get
address_list
reset_errors
@addresses.delete_if { |a|
!a.local || !a.domain
}
end
private
SYM_ATOM = :atom
SYM_ATOM_NON_ASCII = :atom_non_ascii
SYM_QTEXT = :qtext
SYM_COMMA = :comma
SYM_LESS_THAN = :less_than
SYM_GREATER_THAN = :greater_than
SYM_AT_SIGN = :at_sign
SYM_PERIOD = :period
SYM_COLON = :colon
SYM_SEMI_COLON = :semi_colon
SYM_DOMAIN_LITERAL = :domain_literal
def reset_errors
if @errors > 0
@addresses.pop
@errors = 0
end
end
def new_address
reset_errors
@addresses.push(Address.new)
end
# Get the text that has been saved up to this point.
def get_text
text = ''
sep = ''
@lexemes.each { |lexeme|
if lexeme == '.'
text << lexeme
sep = ''
else
text << sep
text << lexeme
sep = ' '
end
}
@lexemes = []
text
end
# Save the current lexeme away for later retrieval with
# get_text.
def save_text
@lexemes << @lexeme
end
# Parse this:
# address_list = ([address] SYNC ",") {[address] SYNC "," } [address] .
def address_list
if @sym == SYM_ATOM ||
@sym == SYM_ATOM_NON_ASCII ||
@sym == SYM_QTEXT ||
@sym == SYM_LESS_THAN
address
end
sync(SYM_COMMA)
return if @sym.nil?
expect(SYM_COMMA)
new_address
while @sym == SYM_ATOM ||
@sym == SYM_ATOM_NON_ASCII ||
@sym == SYM_QTEXT ||
@sym == SYM_LESS_THAN ||
@sym == SYM_COMMA
if @sym == SYM_ATOM ||
@sym == SYM_ATOM_NON_ASCII ||
@sym == SYM_QTEXT ||
@sym == SYM_LESS_THAN
address
end
sync(SYM_COMMA)
return if @sym.nil?
expect(SYM_COMMA)
new_address
end
if @sym == SYM_ATOM || @sym == SYM_QTEXT || @sym == SYM_LESS_THAN
address
end
end
# Parses ahead through a local-part or display-name until no
# longer looking at a word or "." and returns the next symbol.
def address_lookahead
lookahead = []
while @sym == SYM_ATOM ||
@sym == SYM_ATOM_NON_ASCII ||
@sym == SYM_QTEXT ||
@sym == SYM_PERIOD
lookahead.push([@sym, @lexeme])
get
end
retval = @sym
putback(@sym, @lexeme)
putback_array(lookahead)
get
retval
end
# Parse this:
# address = mailbox | group
def address
# At this point we could be looking at a display-name, angle
# addr, or local-part. If looking at a local-part, it could
# actually be a display-name, according to the following:
#
# local-part '@' -> it is a local part of a local-part @ domain
# local-part '<' -> it is a display-name of a mailbox
# local-part ':' -> it is a display-name of a group
# display-name '<' -> it is a mailbox display name
# display-name ':' -> it is a group display name
# set lookahead to '@' '<' or ':' (or another value for
# invalid input)
lookahead = address_lookahead
if lookahead == SYM_COLON
group
else
mailbox(lookahead)
end
end
# Parse this:
# mailbox = angleAddr |
# word {word | "."} angleAddr |
# word {"." word} "@" domain .
#
# lookahead will be set to the return value of
# address_lookahead, which will be '@' or '<' (or another value
# for invalid input)
def mailbox(lookahead)
if @sym == SYM_LESS_THAN
angle_addr
elsif lookahead == SYM_LESS_THAN
display_name_word
while @sym == SYM_ATOM ||
@sym == SYM_ATOM_NON_ASCII ||
@sym == SYM_QTEXT ||
@sym == SYM_PERIOD
if @sym == SYM_ATOM ||
@sym == SYM_ATOM_NON_ASCII ||
@sym == SYM_QTEXT
display_name_word
else
save_text
get
end
end
@addresses.last.display_name = get_text
angle_addr
else
word
while @sym == SYM_PERIOD
save_text
get
word
end
@addresses.last.local = get_text
expect(SYM_AT_SIGN)
domain
if @sym == SYM_LESS_THAN
# Workaround for invalid input. Treat 'foo@bar ' as if it
# were '"foo@bar" '. The domain parser will eat
# 'bar' but stop at '<'. At this point, we've been
# parsing the display name as if it were an address, so we
# throw the address into display_name and parse an
# angle_addr.
@addresses.last.display_name =
format("%s@%s", @addresses.last.local, @addresses.last.domain)
@addresses.last.local = nil
@addresses.last.domain = nil
angle_addr
end
end
end
# Parse this:
# group = word {word | "."} SYNC ":" [mailbox_list] SYNC ";"
def group
word
while @sym == SYM_ATOM || @sym == SYM_QTEXT || @sym == SYM_PERIOD
if @sym == SYM_ATOM || @sym == SYM_QTEXT
word
else
save_text
get
end
end
sync(SYM_COLON)
expect(SYM_COLON)
get_text # throw away group name
@addresses.last.comments = nil
if @sym == SYM_ATOM || @sym == SYM_QTEXT ||
@sym == SYM_COMMA || @sym == SYM_LESS_THAN
mailbox_list
end
sync(SYM_SEMI_COLON)
expect(SYM_SEMI_COLON)
end
# Parse this:
# word = atom | atom_non_ascii | quotedString
def display_name_word
if @sym == SYM_ATOM || @sym == SYM_ATOM_NON_ASCII || @sym == SYM_QTEXT
save_text
get
else
error "expected word, got #{@sym.inspect}"
end
end
# Parse this:
# word = atom | quotedString
def word
if @sym == SYM_ATOM || @sym == SYM_QTEXT
save_text
get
else
error "expected word, got #{@sym.inspect}"
end
end
# Parse a mailbox list.
def mailbox_list
mailbox(address_lookahead)
while @sym == SYM_COMMA
get
new_address
mailbox(address_lookahead)
end
end
# Parse this:
# angleAddr = SYNC "<" [obsRoute] addrSpec SYNC ">"
def angle_addr
expect(SYM_LESS_THAN)
if @sym == SYM_AT_SIGN
obs_route
end
addr_spec
expect(SYM_GREATER_THAN)
end
# Parse this:
# domain = domainLiteral | obsDomain
def domain
if @sym == SYM_DOMAIN_LITERAL
save_text
@addresses.last.domain = get_text
get
elsif @sym == SYM_ATOM
obs_domain
@addresses.last.domain = get_text
else
error "expected start of domain, got #{@sym.inspect}"
end
end
# Parse this:
# addrSpec = localPart "@" domain
def addr_spec
local_part
expect(SYM_AT_SIGN)
domain
end
# Parse this:
# local_part = word *( "." word )
def local_part
word
while @sym == SYM_PERIOD
save_text
get
word
end
@addresses.last.local = get_text
end
# Parse this:
# obs_domain = atom *( "." atom ) .
def obs_domain
expect_save(SYM_ATOM)
while @sym == SYM_PERIOD
save_text
get
expect_save(SYM_ATOM)
end
end
# Parse this:
# obs_route = obs_domain_list ":"
def obs_route
obs_domain_list
expect(SYM_COLON)
end
# Parse this:
# obs_domain_list = "@" domain *( *( "," ) "@" domain )
def obs_domain_list
expect(SYM_AT_SIGN)
domain
while @sym == SYM_COMMA || @sym == SYM_AT_SIGN
while @sym == SYM_COMMA
get
end
expect(SYM_AT_SIGN)
domain
end
end
# Put a token back into the input stream. This token will be
# retrieved by the next call to get.
def putback(sym, lexeme)
@tokens.push([sym, lexeme])
end
# Put back an array of tokens into the input stream.
def putback_array(a)
a.reverse_each { |e|
putback(*e)
}
end
# Get a single token from the string or from the @tokens array
# if somebody used putback.
def get
unless @tokens.empty?
@sym, @lexeme = @tokens.pop
else
get_tokenize
end
end
# Get a single token from the string
def get_tokenize
@lexeme = nil
loop {
case @string
when nil # the end
@sym = nil
break
when "" # the end
@sym = nil
break
when /\A[\r\n\t ]+/m # skip whitespace
@string = $'
when /\A\(/m # skip comment
comment
when /\A""/ # skip empty quoted text
@string = $'
when /\A[\w!$%&\'*+\/=?^_\`{\}|~#-]+/m
@string = $'
@sym = SYM_ATOM
break
when /\A"(.*?([^\\]|\\\\))"/m
@string = $'
@sym = SYM_QTEXT
@lexeme = $1.gsub(/\\(.)/, '\1')
break
when /\A
@string = $'
@sym = SYM_LESS_THAN
break
when /\A>/
@string = $'
@sym = SYM_GREATER_THAN
break
when /\A@/
@string = $'
@sym = SYM_AT_SIGN
break
when /\A,/
@string = $'
@sym = SYM_COMMA
break
when /\A:/
@string = $'
@sym = SYM_COLON
break
when /\A;/
@string = $'
@sym = SYM_SEMI_COLON
break
when /\A\./
@string = $'
@sym = SYM_PERIOD
break
when /\A(\[.*?([^\\]|\\\\)\])/m
@string = $'
@sym = SYM_DOMAIN_LITERAL
@lexeme = $1.gsub(/(^|[^\\])[\r\n\t ]+/, '\1').gsub(/\\(.)/, '\1')
break
when /\A[\200-\377\w!$%&\'*+\/=?^_\`{\}|~#-]+/nm
# This is just like SYM_ATOM, but includes all characters
# with high bits. This is so we can allow such tokens in
# the display name portion of an address even though it
# violates the RFCs.
@string = $'
@sym = SYM_ATOM_NON_ASCII
break
when /\A./
@string = $' # garbage
error('garbage character in string')
else
raise "internal error, @string is #{@string.inspect}"
end
}
if @sym
@lexeme ||= $&
end
end
def comment
depth = 0
comment = ''
catch(:done) {
while @string =~ /\A(\(([^\(\)\\]|\\.)*)/m
@string = $'
comment += $1
depth += 1
while @string =~ /\A(([^\(\)\\]|\\.)*\))/m
@string = $'
comment += $1
depth -= 1
throw :done if depth == 0
if @string =~ /\A(([^\(\)\\]|\\.)+)/
@string = $'
comment += $1
end
end
end
}
comment = comment.gsub(/[\r\n\t ]+/m, ' ').
sub(/\A\((.*)\)$/m, '\1').
gsub(/\\(.)/, '\1')
@addresses.last.comments =
(@addresses.last.comments || []) + [comment]
end
def expect(token)
if @sym == token
get
else
error("expected #{token.inspect} but got #{@sym.inspect}")
end
end
def expect_save(token)
if @sym == token
save_text
end
expect(token)
end
def sync(token)
while @sym && @sym != token
error "expected #{token.inspect} but got #{@sym.inspect}"
get
end
end
def error(s)
@errors += 1
end
end
# Given a string, this function attempts to extract mailing
# addresses from it and returns an RMail::Address::List of those
# addresses (RMail::Address::List is a subclass of Array).
#
# This is identical to using a RMail::Address::Parser directly like
# this:
#
# RMail::Address::Parser.new(string).parse
def Address.parse(string)
Parser.new(string).parse
end
# RMail::Address::List is a simple subclass of the Array class
# that provides convenience methods for accessing the
# RMail::Address objects it contains.
class List < Array
# Returns an array of strings -- the result of calling
# RMail::Address#local on each element of the list.
def locals
collect { |a| a.local }
end
# Returns an array of strings -- the result of calling
# RMail::Address#display_name on each element of the list.
def display_names
collect { |a| a.display_name }
end
# Returns an array of strings -- the result of calling
# RMail::Address#name on each element of the list.
def names
collect { |a| a.name }
end
# Returns an array of strings -- the result of calling
# RMail::Address#domain on each element of the list.
def domains
collect { |a| a.domain }
end
# Returns an array of strings -- the result of calling
# RMail::Address#address on each element of the list.
def addresses
collect { |a| a.address }
end
# Returns an array of strings -- the result of calling
# RMail::Address#format on each element of the list.
def format
collect { |a| a.format }
end
end
end
end
if $0 == __FILE__
parser = RMail::Address::Parser.new('A Group:a@b.c,d@e.f;')
p parser.parse
end