#--
# Copyright (c) 2007 by Mike Mondragon (mikemondragon@gmail.com)
#
# Please see the LICENSE file for licensing information
#++

require 'fileutils'
require 'pathname'
require 'tmpdir'
require 'yaml'

##
# MMS2R is a library to collect media files from MMS messages. MMS messages 
# are multipart emails and cellphone carriers often inject branding into these 
# messages. MMS2R strips the advertising from an MMS leaving the actual user 
# generated media.
#
# If you encounter MMS from a carrier that contains advertising and other non-
# standard media, submit a sample to the author for inclusion in this
# project.
#
# The create method is a factory method to create MMS2R::Media .
# Custom media producers can be pushed into the factory via the
# MMS2R::CARRIER_CLASSES Hash, e.g.
#
# class MMS2R::FakeCarrier < MMS2R::Media; end
# MMS2R::CARRIER_CLASSES['mms.fakecarrier.com'] = MMS2R::FakeCarrier
# ...
# media = MMS2R::Media.create(some_tmail) #media will be a MMS2R::FakeCarrier

module MMS2R

  class MMS2R::Media
    ##
    # TMail object that the media files were derived from.
    attr_reader :mail

    ##
    # media returns the hash of media.  The media hash
    # is keyed by mimetype such as 'text/plain' and the
    # value mapped to the key is an array of media that
    # are of that type.
    attr_reader :media

    ##
    # Carrier is the domain name of the carrier.  If the 
    # carrier is not known the carrier will be set to 'mms2r.media'

    attr_reader :carrier

    ##
    # Base working dir where media for a unique mms message are
    # dropped

    attr_reader :media_dir

    ##
    # Various multi-parts that are bundled into mail

    MULTIPARTS_TO_SPLIT = [ 'multipart/related', 'multipart/alternative', 'multipart/mixed' ]

    ##
    # Factory method that creates MMS2R::Media products.
    #
    # Returns a MMS2R::Media product based on the characteristics
    # of the carrier from which the MMS originated.  
    # mail is a TMail object, logger is a Logger and can be
    # nil.

    def self.create(mail, logger=nil)
      d = lambda{['mms2r.media',MMS2R::Media]} #sets a default to detect
      cc = MMS2R::CARRIER_CLASSES.detect(d) do |n, c| 
              /[^@]+@(.+)/.match(mail.from[0])[1] =~ /^#{Regexp.escape("#{n}")}$/
      end
      cls = cc[1]
      cls.new(mail, cc[0], logger)
    end

    ##
    # Intialize a new Media comprised of a mail and
    # a logger.  Logger is an instance attribute allowing
    # for a logging strategy per carrier type

    def initialize(mail, carrier, logger=nil)

      @mail = mail
      @carrier = carrier
      @logger = logger
      @logger.info("#{self.class} created") unless @logger.nil?
      @media = Hash.new
      @dir_count = 0
      @media_dir = File.join(self.class.tmp_dir(), 
                     self.class.safe_message_id(@mail.message_id))
      # get warnings out of our hair ...
      @number = nil
      @subject = nil
      @body = nil
      @default_media = nil
      @default_text = nil

      #TODO: new should be 'create' refactor to this behavior
    end

    ##
    # Get the phone number associated with this MMS if it exists.
    # The value returned is simplistic it, is just the user name of
    # the from address before the @ symbol.  Validate the number by
    # your application on your own.  Most carriers are using the real
    # phone number as the username.

    def get_number
      # override this method in a child if the number exists elsewhere (like Sprint)
      @number ||= /^([^@]+)@/.match(mail.from[0])[1]
    end

    ##
    # Filter some common place holder subjects from MMS messages and
    # return nil such that default carrier subjects can be pragmatically
    # ignored.

    def get_subject

      return @subject if @subject # we've already done the work

      subject = @mail.subject
      return @subject ||= nil if subject.nil? || subject.strip.empty?

      # subject is not already set, lets see what our defaults are
      a = Array.new
      # default subjects to ignore are in mms2r_media.yml
      f = clz.yaml_file_name(sclz, :subject)
      yf = File.join(self.class.conf_dir(), "#{f}")
      a = a + YAML::load_file(yf) if File::exist?(yf) 
      # class default subjects
      f = clz.yaml_file_name(clz, :subject)
      yf = File.join(self.class.conf_dir(), "#{f}")
      a = a + YAML::load_file(yf) if File::exist?(yf) 
      return @subject ||= subject if a.empty?
      return @subject ||= nil if a.detect{|r| r.match(subject.strip)}
      return @subject ||= subject
    end
    
    # Convenience method that returns a string including all the text of the 
    # first text/plain file found. Returns empty string if no body text 
    # is found.
    def get_body
      return @body if @body

      text_file = get_text
      if text_file.nil?
        return @body ||= nil
      end
      
      return @body ||= IO.readlines(text_file.path).join.strip
    end

    # Returns a File with the most likely candidate for the user-submitted
    # media. Given that most MMS messages only have one file attached,
    # this will try to give you that file. First it looks for videos, then
    # images. It also adds singleton methods to the File object so it can
    # be used in place of a CGI upload (local_path, original_filename, size,
    # and content_type).  The largest file found in terms of bytes is returned.
    #
    # Returns nil if there are not any video or image Files found.

    def get_media
      return @default_media ||= get_attachement(['video', 'image'])
    end

    # Returns a File with the most likely candidate that is text, or nil
    # otherwise. It also adds singleton methods to the File object so it can
    # be used in place of a CGI upload (local_path, original_filename, size,
    # and content_type).  The largest file found in terms of bytes is returned.
    #
    # Returns nil if there are not any text Files found

    def get_text
      return @default_text ||= get_attachement(['text'])
    end

    ##
    # process is a template method and collects all the media in a MMS.
    # Override helper methods to this template to clean out advertising 
    # and/or ignore media that are advertising. This method should not be 
    # overridden unless there is an extreme special case in processing the 
    # media of a MMS (like Sprint)
    #
    # Helper methods for the process template:
    # * ignore_media? -- true if the media contained in a part should be ignored.
    # * process_media -- retrieves media to temporary file, returns path to file.
    # * transform_text -- called by process_media, strips out advertising.
    # * temp_file -- creates a temporary filepath based on information from the part.
    # 
    # Block support:
    # Calling process() with a block to automatically iterate through media.
    # For example, to process and receive all media types of video, you can do:
    #   mms.process do |media_type, file|
    #     results << file if media_type =~ /video/
    #   end
    #
    # note: purge must be explicitly called to remove the media files
    #       mms2r extracts from an mms message.

    def process() # :yields: media_type, file
      @logger.info("#{self.class} processing") unless @logger.nil?

      # build up all the parts
      parts = @mail.parts
      if !@mail.multipart?
        parts = Array.new()
        parts << @mail
      end

      # double check for multipart/related, if it exists
      # replace it with its children parts
      parts.each do |p|
        if MULTIPARTS_TO_SPLIT.include?(self.class.part_type?(p))
          part = parts.delete(p)
          part.parts.each { |mp| parts << mp }
        end
      end

      # multipart/related can have multipart/alternative as a child. if
      # exists, replace with children
      parts.each do |p|
        if self.class.part_type?(p).eql?('multipart/alternative')
          part = parts.delete(p)
          part.parts.each { |mp| parts << mp }
        end
      end

      # get to work
      parts.each do |p|
        t = self.class.part_type?(p)
        unless ignore_media?(t,p)
          t,f = process_media(p)
          add_file(t,f) unless t.nil? || f.nil?
        end
      end

      # when process acts upon a block
      if block_given?
        media.each do |k, v|
          yield(k, v)
        end
      end

    end

    ##
    # Helper for process template method to determine if 
    # media contained in a part should be ignored.  Producers 
    # should override this method to return true for media such 
    # as images that are advertising, carrier logos, etc.
    # The corresponding *_ignore.yml for a given class contains
    # either a regular expression for the text types or a file
    # name for all other types.  When writing an ignore regular
    # expression assume that the text it will be evaluated against
    # has been flattened where one or more consecutive whitespace 
    # (tab, space, new lines and line feeds) characters are replaced 
    # with one space ' ' character.

    def ignore_media?(type,part)

      # default media to ignore are in mms2r_media.yml
      # which is a hash of mime types as keys each to an
      # array of regular expressions
      f = clz.yaml_file_name(sclz, :ignore)
      yf = File.join(self.class.conf_dir(), "#{f}")
      h = YAML::load_file(yf) if File::exist?(yf) 
      h ||= Hash.new

      # merge in the ignore hash of the specific child
      f = clz.yaml_file_name(clz, :ignore)
      yf = File.join(self.class.conf_dir(), "#{f}")
      if File::exist?(yf)
        ignores = YAML::load_file(yf)
        ignores.each do |k,v|
          unless h[k]
            h[k] = v
          else
            v.each{|e| h[k] << e}
          end
        end
      end
      a ||= h[type]
      return false if h.empty? || a.nil?

      m = /^([^\/]+)\//.match(type)[1]
      # fire each regular expression, only break if there is a match
      ignore = a.each do |i|
        if m.eql?('text') || type.eql?('application/smil')
          s = part.body.gsub(/\s+/m," ").strip
          break(i) if i.match(s)
        end
        break(i) if filename?(part).eql?(i)
      end
      return ignore.eql?(a) ? false : true # when ignore is equal to 'a' that
                                          # means none of the breaks fired in
                                          # the loop, if a break 
    end

    ##
    # Helper for process template method to decode the part based 
    # on its type and write its content to a temporary file.  Returns 
    # path to temporary file that holds the content.  Parts with a main
    # type of text will have their contents transformed with a call to
    # transform_text
    #
    # Producers should only override this method if the parts of
    # the MMS need special treatment besides what is expected for
    # a normal mime part (like Sprint).
    #
    # Returns a tuple of content type, file path

    def process_media(part)
      # TMail body auto-magically decodes quoted
      # printable for text/html type.
      file = temp_file(part)
      case
      when self.class.main_type?(part).eql?('text')
        type, content = transform_text_part(part)
      when self.class.part_type?(part).eql?('application/smil')
        type, content = transform_text_part(part)
      else
        type = self.class.part_type?(part)
        content = part.body
      end
      return type, nil if content.nil?

      @logger.info("#{self.class} writing file #{file}") unless @logger.nil?
      File.open(file,'w'){ |f|
        f.write(content)
      }
      return type, file
    end

    ##
    # Helper for process_media template method to transform text.
    # The regular expressions for the transform are in the
    # conf/*_transform.yml files.
    # Input is the type of text and the text to transform.

    def transform_text(type, text)
      f = clz.yaml_file_name(clz, :transform)
      yf = File.join(self.class.conf_dir(), "#{f}")
      return type, text unless File::exist?(yf)

      h = YAML::load_file(yf)
      a = h[type]
      return type, text if a.nil?

      #convert to UTF-8
      begin
        c = Iconv.new('ISO-8859-1', 'UTF-8' )
        utf_t = c.iconv(text)
      rescue Exception => e
        utf_t = text
      end

      # 'from' is a Regexp in the conf and 'to' is the match position
      # or from is text that will be replaced with to
      a.each { |from,to| utf_t = utf_t.gsub(from,to).strip }
      return type, utf_t.strip
    end

    ##
    # Helper for process_media template method to transform text.
    # The regular expressions for the trans are in *_transform.yml
    # Input is a mail part

    def transform_text_part(part)
      type = self.class.part_type?(part)
      text = part.body.strip
      transform_text(type, text)
    end

    ##
    # Helper for process template method to name a temporary
    # filepath based on information in the part.  This version
    # attempts to honor the name of the media as labeled in the part
    # header and creates a unique temporary directory for writing
    # the file so filename collision does not occur.
    # Consumers of this method expect the directory
    # structure to the file exists, if the method is overridden it
    # is mandatory that this behavior is retained.

    def temp_file(part)
      file_name = filename?(part)
      File.join(msg_tmp_dir(),File.basename(file_name))
    end

    ##
    # Purges the unique MMS2R::Media.media_dir directory created 
    # for this producer and all of the media that it contains.

    def purge()
      @logger.info("#{self.class} purging #{@media_dir} and all its contents") unless @logger.nil?
      FileUtils.rm_rf(@media_dir)
    end

    ##
    # Helper to add a file to the media hash.

    def add_file(type, file)
      @media[type] = [] unless @media[type]
      @media[type] << file
    end

    ##
    # Helper to temp_file to create a unique temporary directory that is
    # a child of tmp_dir  This version is based on the message_id of the
    # mail.

    def msg_tmp_dir()
      @dir_count += 1
      dir = File.join(@media_dir, "#{@dir_count}")
      FileUtils.mkdir_p(dir)
      dir
    end

    ##
    # returns a filename declared for a part, or a default if its not defined

    def filename?(part)
      part.sub_header("content-type", "name") ||
        part.sub_header("content-disposition", "filename") ||
        (part['content-location'] && part['content-location'].body) ||
        "#{Time.now.to_f}.#{self.class.default_ext(self.class.part_type?(part))}"
    end

    @@tmp_dir = File.join(Dir.tmpdir, (ENV['USER'].nil? ? '':ENV['USER']), 'mms2r')

    ##
    # Get the temporary directory where media files are written to.

    def self.tmp_dir
      @@tmp_dir
    end

    ##
    # Set the temporary directory where media files are written to.
    def self.tmp_dir=(d)
      @@tmp_dir=d
    end

    @@conf_dir = File.join(File.dirname(__FILE__), '..', '..', 'conf')

    ##
    # Get the directory where conf files are stored.

    def self.conf_dir
      @@conf_dir
    end

    ##
    # Set the directory where conf files are stored.
    def self.conf_dir=(d)
      @@conf_dir=d
    end

    ##
    # Helper to create a safe directory path element based on the
    # mail message id.

    def self.safe_message_id(mid)
      return "#{Time.now.to_i}" if mid.nil?
      mid.gsub(/\$|<|>|@|\./, "")
    end

    ##
    # Returns a default file extension based on a content type

    def self.default_ext(content_type)
      ext = MMS2R::EXT[content_type]
      ext = /[^\/]+\/(.+)/.match(content_type)[1] if ext.nil?
      ext
    end

    ##
    # Determines the mimetype of a part.  Guarantees a type is returned.

    def self.part_type?(part)
      if part.content_type.nil?
        return 'text/plain'
      end
      part.content_type
    end

    ##
    # Determines the main type of the part's mimetype

    def self.main_type?(part)
      /^([^\/]+)\//.match(self.part_type?(part))[1]
    end

    ##
    # Determines the sub type of the part's mimetype

    def self.sub_type?(part)
      /\/([^\/]+)$/.match(self.part_type?(part))[1]
    end

    ##
    # helper to contruct a yml file name with a class
    # name based pattern, i.e. mms2r_tmobilemedia_ignore.yml
    # for yaml_file_name(MMS2R::TMobileMedia,:ignore)

    def self.yaml_file_name(clz,kind)
      # like active_support's inflector
      flat = clz.name.gsub(/::/, '_').
      gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
      gsub(/([a-z])([A-Z])/,'\1_\2').
      tr("-", "_").downcase
      "#{flat}_#{kind.to_s}.yml"
    end

    ##
    # helper to fetch self.class quicly

    def clz
      self.class
    end

    ##
    # helper to fetch self.class.superclass quickly

    def sclz
      self.class.superclass
    end

    private

    ##
    # used by get_media and get_text to return the biggest attachment type
    # listed in the types array

    def get_attachement(types)

      # get all the files that are of the major types passed in
      files = Array.new
      types.each do |t|
        media.keys.each do |k|
          files.concat(media[k]) if /^#{t}\//.match(k)
        end
      end
      return nil if files.empty?

      #get the largest file
      file = nil # explicitly declare the file and size
      size = 0
      mime_type = nil

      files.each do |f|
        # this will safely evaluate since we wouldn't be looking at
        # media[mime_type] after the check just before this
        if File.size(f) > size
          size = File.size(f)
          file = File.new(f)
          # media is hash of types to arrays of file names
          # detect on the hash returns an array, the 0th element is
          # the mime type of the file that was found in the files array
          # i.e. {'text/foo' => ['/foo/bar.txt', '/hello/world.txt']}
          mime_type = media.detect{|k,v| v.detect{|fl| fl == f}}[0] rescue nil
        end
      end

      # These singleton methods implement the interface necessary to be used
      # as a drop-in replacement for files uploaded with CGI.rb.
      # This helps if you want to use the files with, for example,
      # attachment_fu.
      def file.local_path
        self.path
      end

      def file.original_filename
        File.basename(self.path)
      end

      def file.size
        File.size(self.path)
      end

      # this one is kind of confusing because it needs a closure.
      class << file
        self
      end.send(:define_method, :content_type) { mime_type }

      file
    end

  end

end