lib/sup/rfc2047.rb in sup-1.0 vs lib/sup/rfc2047.rb in sup-1.1

- old
+ new

@@ -15,31 +15,32 @@ # Copyright (c) Sam Roberts <sroberts / uniserve.com> 2004 # # This file is distributed under the same terms as Ruby. module Rfc2047 - WORD = %r{=\?([!\#$%&'*+-/0-9A-Z\\^\`a-z{|}~]+)\?([BbQq])\?([!->@-~]+)\?=} # :nodoc: 'stupid ruby-mode + WORD = %r{=\?([!\#$%&'*+-/0-9A-Z\\^\`a-z{|}~]+)\?([BbQq])\?([!->@-~ ]+)\?=} # :nodoc: 'stupid ruby-mode WORDSEQ = %r{(#{WORD.source})\s+(?=#{WORD.source})} def Rfc2047.is_encoded? s; s =~ WORD end # Decodes a string, +from+, containing RFC 2047 encoded words into a target # character set, +target+. See iconv_open(3) for information on the # supported target encodings. If one of the encoded words cannot be # converted to the target encoding, it is left in its encoded form. def Rfc2047.decode_to(target, from) from = from.gsub(WORDSEQ, '\1') - out = from.gsub(WORD) do + from.gsub(WORD) do |word| charset, encoding, text = $1, $2, $3 # B64 or QP decode, as necessary: case encoding when 'b', 'B' - #puts text - text = text.unpack('m*')[0] - #puts text.dump + ## Padding is optional in RFC 2047 words. Add some extra padding + ## before decoding the base64, otherwise on Ruby 2.0 the final byte + ## might be discarded. + text = (text + '===').unpack('m*')[0] when 'q', 'Q' # RFC 2047 has a variant of quoted printable where a ' ' character # can be represented as an '_', rather than =32, so convert # any of these that we find before doing the QP decoding. @@ -48,9 +49,23 @@ # Don't need an else, because no other values can be matched in a # WORD. end - text.transcode(target, charset) + # Handle UTF-7 specially because Ruby doesn't actually support it as + # a normal character encoding. + if charset == 'UTF-7' + begin + next text.decode_utf7.encode(target) + rescue ArgumentError, EncodingError + next word + end + end + + begin + text.force_encoding(charset).encode(target) + rescue ArgumentError, EncodingError + word + end end end end