Sha256: bb9ecba12abb889d087775393ef82fec695e6d95cdc8467e8b9c6020987308fe

Contents?: true

Size: 988 Bytes

Versions: 1

Compression:

Stored size: 988 Bytes

Contents

require 'chronicle/etl'
require 'mail'
require 'tempfile'

module Chronicle
  module Email
    class MboxExtractor < Chronicle::Etl::Extractor
      # mbox format is a bunch of emails concatanated together, separated
      # by a line that starts with "From "
      NEW_EMAIL_REGEX = Regexp.new('^From [^\s]+ .{24}')

      def results_count
        file = File.open(@options[:filename])
        count = 0
        file.each do |line|
          count += 1 if line =~ NEW_EMAIL_REGEX
        end
        return count
      end

      def extract
        file = File.open(@options[:filename])
        tmp = Tempfile.new('chronicile-mbox')

        file.each do |line|
          if line =~ NEW_EMAIL_REGEX
            if File.size(tmp) > 0
              tmp.rewind
              email = tmp.read
              yield email
              tmp.truncate(0)
              tmp.rewind
            end
          end
          tmp.write(line)
        end
        file.close
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
chronicle-email-0.1.0 lib/chronicle/email/mbox_extractor.rb