lib/sup/mbox/loader.rb in sup-0.7 vs lib/sup/mbox/loader.rb in sup-0.8

- old
+ new

@@ -7,11 +7,11 @@ class Loader < Source yaml_properties :uri, :cur_offset, :usual, :archived, :id, :labels attr_accessor :labels ## uri_or_fp is horrific. need to refactor. - def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil, labels=[] + def initialize uri_or_fp, start_offset=0, usual=true, archived=false, id=nil, labels=[] @mutex = Mutex.new @labels = ((labels || []) - LabelManager::RESERVED_LABELS).uniq.freeze case uri_or_fp when String @@ -54,49 +54,60 @@ def load_header offset header = nil @mutex.synchronize do @f.seek offset l = @f.gets - unless l =~ BREAK_RE + unless MBox::is_break_line? l raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}." end - header = MBox::read_header @f + header = parse_raw_email_header @f end header end def load_message offset @mutex.synchronize do @f.seek offset begin - RMail::Mailbox::MBoxReader.new(@f).each_message do |input| - m = RMail::Parser.read(input) - if m.body && m.body.is_a?(String) - m.body.gsub!(/^>From /, "From ") - end - return m - end + ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore + ## "From" at the start of a message body line. + string = "" + l = @f.gets + string << l until @f.eof? || MBox::is_break_line?(l = @f.gets) + RMail::Parser.read string rescue RMail::Parser::Error => e raise FatalSourceError, "error parsing mbox file: #{e.message}" end end end + ## scan forward until we're at the valid start of a message + def correct_offset! + @mutex.synchronize do + @f.seek cur_offset + string = "" + until @f.eof? || (l = @f.gets) =~ BREAK_RE + string << l + end + self.cur_offset += string.length + end + end + def raw_header offset ret = "" @mutex.synchronize do @f.seek offset until @f.eof? || (l = @f.gets) =~ /^\r*$/ - ret += l + ret << l end end ret end def raw_message offset ret = "" - each_raw_message_line(offset) { |l| ret += l } + each_raw_message_line(offset) { |l| ret << l } ret end ## apparently it's a million times faster to call this directly if ## we're just moving messages around on disk, than reading things @@ -106,11 +117,11 @@ ## sup-sync-back has to do it. def each_raw_message_line offset @mutex.synchronize do @f.seek offset yield @f.gets - until @f.eof? || (l = @f.gets) =~ BREAK_RE + until @f.eof? || MBox::is_break_line?(l = @f.gets) yield l end end end @@ -127,20 +138,20 @@ ## 1. before a \n and a mbox separator, if it was previously at ## EOF and a new message was added; or, ## 2. at the beginning of an mbox separator (in all other ## cases). - l = @f.gets or raise "next while at EOF" + l = @f.gets or return nil if l =~ /^\s*$/ # case 1 returned_offset = @f.tell @f.gets # now we're at a BREAK_RE, so skip past it else # case 2 returned_offset = cur_offset ## we've already skipped past the BREAK_RE, so just go end while(line = @f.gets) - break if line =~ BREAK_RE + break if MBox::is_break_line? line next_offset = @f.tell end end rescue SystemCallError, IOError => e raise FatalSourceError, "Error reading #{@f.path}: #{e.message}"