lib/chronicle/email/imap_extractor.rb in chronicle-email-0.2.3 vs lib/chronicle/email/imap_extractor.rb in chronicle-email-0.3.0
- old
+ new
@@ -1,14 +1,16 @@
require 'net/imap'
+require 'mail'
module Chronicle
module Email
class IMAPExtractor < Chronicle::ETL::Extractor
register_connector do |r|
- r.provider = 'email'
- r.description = 'imap server'
- r.identifier = 'imap'
+ r.source = :email
+ r.type = :message
+ r.strategy = :imap
+ r.description = 'IMAP server'
end
setting :host, required: true, default: 'imap.gmail.com'
setting :port, type: :numeric, required: true, default: 993
setting :mailbox, required: true, default: '[Gmail]/All Mail'
@@ -26,66 +28,73 @@
end
def extract
@message_ids.each do |message_id|
message = fetch_message(message_id)
- yield Chronicle::ETL::Extraction.new(data: { email: message.attr["BODY[]"]} )
+ email = Mail.new(message.attr['BODY[]'])
+ data = {
+ raw: email,
+ time: email.date&.to_time,
+ subject: email.subject,
+ from: email&.from&.join(', '),
+ to: email&.to&.join(', ')
+ }
+ yield build_extraction(data:)
end
end
private
def create_connection
connection = Net::IMAP.new(@config.host, @config.port, true)
connection.login(@config.username, @config.password)
connection.select(@config.mailbox)
connection
- rescue Net::IMAP::NoResponseError => e
- raise(Chronicle::ETL::ExtractionError, "Error connecting to IMAP server. Please check username and password")
+ rescue Net::IMAP::NoResponseError
+ raise(Chronicle::ETL::ExtractionError, 'Error connecting to IMAP server. Please check username and password')
end
def fetch_message_ids
keys = gmail_mode? ? search_keys_gmail : search_keys_default
message_ids = @connection.search(keys)
message_ids = message_ids.first(@config.limit) if @config.limit
message_ids
- rescue Net::IMAP::BadResponseError => e
- raise(Chronicle::ETL::ExtractionError, "Error searching IMAP server for messages")
+ rescue Net::IMAP::BadResponseError
+ raise(Chronicle::ETL::ExtractionError, 'Error searching IMAP server for messages')
end
def fetch_message(message_id)
- response = @connection.fetch(message_id, "BODY.PEEK[]")
- raise(Chronicle::ETL::ExtractionError, "Error loading message") unless response
+ response = @connection.fetch(message_id, 'BODY.PEEK[]')
+ raise(Chronicle::ETL::ExtractionError, 'Error loading message') unless response
- return response[0]
+ response[0]
end
def search_keys_gmail
# Gmail offers an extension to IMAP that lets us use gmail queries
- q = ""
# First, we ignore drafts beacuse they break a lot of assumptions we
# make when when processing emails (lack of timestamps, ids, etc)
- q = "-label:draft"
+ q = '-label:draft'
# We use UNIX timestamps in gmail filters which let us do more precise
# since/until compared with date-based imap filters
q += " after:#{@config.since.to_i}" if @config.since
q += " before:#{@config.until.to_i}" if @config.until
q += " #{@config.search_query}" if @config.search_query
- ["X-GM-RAW", q]
+ ['X-GM-RAW', q]
end
def search_keys_default
keys = []
# TODO: test out non-gmail IMAP searching (for @config.search_query)
keys += ['SINCE', Net::IMAP.format_date(@config.since)] if @config.since
- keys += ['BEFORE', Net::IMAP.format_date(@config.until)] if @config.until
+ keys + ['BEFORE', Net::IMAP.format_date(@config.until)] if @config.until
end
def gmail_mode?
@config.host == 'imap.gmail.com'
end
end
end
-end
\ No newline at end of file
+end