Sha256: cd429f948175101bd83fd021bae4db8a2e2e06537199cf4f45426006def55174

Contents?: true

Size: 988 Bytes

Versions: 1

Compression:

Stored size: 988 Bytes

Contents

require 'chronicle/etl'
require 'mail'
require 'tempfile'

module Chronicle
  module Email
    class MboxExtractor < Chronicle::ETL::Extractor
      # mbox format is a bunch of emails concatanated together, separated
      # by a line that starts with "From "
      NEW_EMAIL_REGEX = Regexp.new('^From [^\s]+ .{24}')

      def results_count
        file = File.open(@options[:filename])
        count = 0
        file.each do |line|
          count += 1 if line =~ NEW_EMAIL_REGEX
        end
        return count
      end

      def extract
        file = File.open(@options[:filename])
        tmp = Tempfile.new('chronicile-mbox')

        file.each do |line|
          if line =~ NEW_EMAIL_REGEX
            if File.size(tmp) > 0
              tmp.rewind
              email = tmp.read
              yield email
              tmp.truncate(0)
              tmp.rewind
            end
          end
          tmp.write(line)
        end
        file.close
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
chronicle-email-0.1.1 lib/chronicle/email/mbox_extractor.rb