lib/chronicle/email/imap_extractor.rb in chronicle-email-0.2.3 vs lib/chronicle/email/imap_extractor.rb in chronicle-email-0.3.0

- old
+ new

@@ -1,14 +1,16 @@ require 'net/imap' +require 'mail' module Chronicle module Email class IMAPExtractor < Chronicle::ETL::Extractor register_connector do |r| - r.provider = 'email' - r.description = 'imap server' - r.identifier = 'imap' + r.source = :email + r.type = :message + r.strategy = :imap + r.description = 'IMAP server' end setting :host, required: true, default: 'imap.gmail.com' setting :port, type: :numeric, required: true, default: 993 setting :mailbox, required: true, default: '[Gmail]/All Mail' @@ -26,66 +28,73 @@ end def extract @message_ids.each do |message_id| message = fetch_message(message_id) - yield Chronicle::ETL::Extraction.new(data: { email: message.attr["BODY[]"]} ) + email = Mail.new(message.attr['BODY[]']) + data = { + raw: email, + time: email.date&.to_time, + subject: email.subject, + from: email&.from&.join(', '), + to: email&.to&.join(', ') + } + yield build_extraction(data:) end end private def create_connection connection = Net::IMAP.new(@config.host, @config.port, true) connection.login(@config.username, @config.password) connection.select(@config.mailbox) connection - rescue Net::IMAP::NoResponseError => e - raise(Chronicle::ETL::ExtractionError, "Error connecting to IMAP server. Please check username and password") + rescue Net::IMAP::NoResponseError + raise(Chronicle::ETL::ExtractionError, 'Error connecting to IMAP server. Please check username and password') end def fetch_message_ids keys = gmail_mode? ? search_keys_gmail : search_keys_default message_ids = @connection.search(keys) message_ids = message_ids.first(@config.limit) if @config.limit message_ids - rescue Net::IMAP::BadResponseError => e - raise(Chronicle::ETL::ExtractionError, "Error searching IMAP server for messages") + rescue Net::IMAP::BadResponseError + raise(Chronicle::ETL::ExtractionError, 'Error searching IMAP server for messages') end def fetch_message(message_id) - response = @connection.fetch(message_id, "BODY.PEEK[]") - raise(Chronicle::ETL::ExtractionError, "Error loading message") unless response + response = @connection.fetch(message_id, 'BODY.PEEK[]') + raise(Chronicle::ETL::ExtractionError, 'Error loading message') unless response - return response[0] + response[0] end def search_keys_gmail # Gmail offers an extension to IMAP that lets us use gmail queries - q = "" # First, we ignore drafts beacuse they break a lot of assumptions we # make when when processing emails (lack of timestamps, ids, etc) - q = "-label:draft" + q = '-label:draft' # We use UNIX timestamps in gmail filters which let us do more precise # since/until compared with date-based imap filters q += " after:#{@config.since.to_i}" if @config.since q += " before:#{@config.until.to_i}" if @config.until q += " #{@config.search_query}" if @config.search_query - ["X-GM-RAW", q] + ['X-GM-RAW', q] end def search_keys_default keys = [] # TODO: test out non-gmail IMAP searching (for @config.search_query) keys += ['SINCE', Net::IMAP.format_date(@config.since)] if @config.since - keys += ['BEFORE', Net::IMAP.format_date(@config.until)] if @config.until + keys + ['BEFORE', Net::IMAP.format_date(@config.until)] if @config.until end def gmail_mode? @config.host == 'imap.gmail.com' end end end -end \ No newline at end of file +end