lib/sup/mbox.rb in sup-0.11 vs lib/sup/mbox.rb in sup-0.12
- old
+ new
@@ -1,15 +1,157 @@
-require "sup/mbox/loader"
-require "sup/mbox/ssh-file"
-require "sup/mbox/ssh-loader"
+require 'uri'
+require 'set'
module Redwood
-module MBox
+class MBox < Source
BREAK_RE = /^From \S+ (.+)$/
- def is_break_line? l
+ include SerializeLabelsNicely
+ yaml_properties :uri, :usual, :archived, :id, :labels
+
+ attr_reader :labels
+
+ ## uri_or_fp is horrific. need to refactor.
+ def initialize uri_or_fp, usual=true, archived=false, id=nil, labels=nil
+ @mutex = Mutex.new
+ @labels = Set.new((labels || []) - LabelManager::RESERVED_LABELS)
+
+ case uri_or_fp
+ when String
+ uri = URI(Source.expand_filesystem_uri(uri_or_fp))
+ raise ArgumentError, "not an mbox uri" unless uri.scheme == "mbox"
+ raise ArgumentError, "mbox URI ('#{uri}') cannot have a host: #{uri.host}" if uri.host
+ raise ArgumentError, "mbox URI must have a path component" unless uri.path
+ @f = File.open uri.path, 'rb'
+ @path = uri.path
+ else
+ @f = uri_or_fp
+ @path = uri_or_fp.path
+ end
+
+ super uri_or_fp, usual, archived, id
+ end
+
+ def file_path; @path end
+ def is_source_for? uri; super || (self.uri.is_a?(String) && (URI(Source.expand_filesystem_uri(uri)) == URI(Source.expand_filesystem_uri(self.uri)))) end
+
+ def self.suggest_labels_for path
+ ## heuristic: use the filename as a label, unless the file
+ ## has a path that probably represents an inbox.
+ if File.dirname(path) =~ /\b(var|usr|spool)\b/
+ []
+ else
+ [File.basename(path).downcase.intern]
+ end
+ end
+
+ def load_header offset
+ header = nil
+ @mutex.synchronize do
+ @f.seek offset
+ header = parse_raw_email_header @f
+ end
+ header
+ end
+
+ def load_message offset
+ @mutex.synchronize do
+ @f.seek offset
+ begin
+ ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore
+ ## "From" at the start of a message body line.
+ string = ""
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ string << l
+ end
+ RMail::Parser.read string
+ rescue RMail::Parser::Error => e
+ raise FatalSourceError, "error parsing mbox file: #{e.message}"
+ end
+ end
+ end
+
+ def raw_header offset
+ ret = ""
+ @mutex.synchronize do
+ @f.seek offset
+ until @f.eof? || (l = @f.gets) =~ /^\r*$/
+ ret << l
+ end
+ end
+ ret
+ end
+
+ def raw_message offset
+ ret = ""
+ each_raw_message_line(offset) { |l| ret << l }
+ ret
+ end
+
+ def store_message date, from_email, &block
+ need_blank = File.exists?(@path) && !File.zero?(@path)
+ File.open(@path, "ab") do |f|
+ f.puts if need_blank
+ f.puts "From #{from_email} #{date.asctime}"
+ yield f
+ end
+ end
+
+ ## apparently it's a million times faster to call this directly if
+ ## we're just moving messages around on disk, than reading things
+ ## into memory with raw_message.
+ ##
+ ## i hoped never to have to move shit around on disk but
+ ## sup-sync-back has to do it.
+ def each_raw_message_line offset
+ @mutex.synchronize do
+ @f.seek offset
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ yield l
+ end
+ end
+ end
+
+ def default_labels
+ [:inbox, :unread]
+ end
+
+ def poll
+ first_offset = first_new_message
+ offset = first_offset
+ end_offset = File.size @f
+ while offset and offset < end_offset
+ yield :add,
+ :info => offset,
+ :labels => (labels + default_labels),
+ :progress => (offset - first_offset).to_f/end_offset
+ offset = next_offset offset
+ end
+ end
+
+ def next_offset offset
+ @mutex.synchronize do
+ @f.seek offset
+ nil while line = @f.gets and not MBox::is_break_line? line
+ offset = @f.tell
+ offset != File.size(@f) ? offset : nil
+ end
+ end
+
+ ## TODO optimize this by iterating over allterms list backwards or
+ ## storing source_info negated
+ def last_indexed_message
+ benchmark(:mbox_read_index) { Enumerator.new(Index.instance, :each_source_info, self.id).map(&:to_i).max }
+ end
+
+ ## offset of first new message or nil
+ def first_new_message
+ next_offset(last_indexed_message || 0)
+ end
+
+ def self.is_break_line? l
l =~ BREAK_RE or return false
time = $1
begin
## hack -- make Time.parse fail when trying to substitute values from Time.now
Time.parse time, 0
@@ -17,8 +159,11 @@
rescue NoMethodError, ArgumentError
warn "found invalid date in potential mbox split line, not splitting: #{l.inspect}"
false
end
end
- module_function :is_break_line?
+
+ class Loader < self
+ yaml_properties :uri, :usual, :archived, :id, :labels
+ end
end
end