Sha256: 026074255b85db6866644611106f630d2de9695dc5eedbd999f0d1bbe38cbe7a

Contents?: true

Size: 1.66 KB

Versions: 1

Compression:

Stored size: 1.66 KB

Contents

# scrape mbox file
class MboxHeaderScraper::Scraper
  # rubocop:disable Metrics/AbcSize
  def self.process(in_file, out_file, options = { Subject: true, Date: true, From: true, To: true, CC: true })
    Tempfile.open('mbox_header_scraper_result') do |result_file|
      result_file.write(insert_header(options))

      tmp = nil

      # to prevent nil error on first line
      def tmp.closed?
        true
      end

      IO.foreach(in_file) do |line|
        # encode to convert invalid charcter
        enc_line = line.encode('UTF-16BE', 'UTF-8',
                               invalid: :replace, undef: :replace, replace: '?').encode('UTF-8')

        if /^From / =~ enc_line && !tmp.closed?
          tmp.close(false)
          result_file.write(single_mail_to_tsv(tmp, options))
          tmp.delete
        end

        tmp = Tempfile.open('mbox_header_scraper_tmp') if tmp.closed?
        tmp.write(enc_line)
      end

      tmp.close(false)
      result_file.write(single_mail_to_tsv(tmp, options))
      tmp.delete

      result_file.close(false)
      FileUtils.copy_file(result_file.path, out_file)
    end
  end
  # rubocop:enable Metrics/AbcSize

  def self.check_in_file(filename)
    return 'file does not exists.' unless File.exist?(filename)
    nil
  end

  def self.check_out_file(filename)
    return 'file already exists.' if File.exist?(filename)
    nil
  end

  def self.single_mail_to_tsv(mail_file, options)
    mail = MboxHeaderScraper::Mail.new(mail_file)
    mail.header_to_tsv(options)
  end

  def self.insert_header(headers)
    line = []

    headers.select { |v| headers[v] == true }.keys.each do |v|
      line << v
    end

    (line.join("\t") + "\n")
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
mbox_header_scraper-0.1.1 lib/mbox_header_scraper/scraper.rb