lib/sup/mbox/loader.rb in sup-0.7 vs lib/sup/mbox/loader.rb in sup-0.8
- old
+ new
@@ -7,11 +7,11 @@
class Loader < Source
yaml_properties :uri, :cur_offset, :usual, :archived, :id, :labels
attr_accessor :labels
## uri_or_fp is horrific. need to refactor.
- def initialize uri_or_fp, start_offset=nil, usual=true, archived=false, id=nil, labels=[]
+ def initialize uri_or_fp, start_offset=0, usual=true, archived=false, id=nil, labels=[]
@mutex = Mutex.new
@labels = ((labels || []) - LabelManager::RESERVED_LABELS).uniq.freeze
case uri_or_fp
when String
@@ -54,49 +54,60 @@
def load_header offset
header = nil
@mutex.synchronize do
@f.seek offset
l = @f.gets
- unless l =~ BREAK_RE
+ unless MBox::is_break_line? l
raise OutOfSyncSourceError, "mismatch in mbox file offset #{offset.inspect}: #{l.inspect}."
end
- header = MBox::read_header @f
+ header = parse_raw_email_header @f
end
header
end
def load_message offset
@mutex.synchronize do
@f.seek offset
begin
- RMail::Mailbox::MBoxReader.new(@f).each_message do |input|
- m = RMail::Parser.read(input)
- if m.body && m.body.is_a?(String)
- m.body.gsub!(/^>From /, "From ")
- end
- return m
- end
+ ## don't use RMail::Mailbox::MBoxReader because it doesn't properly ignore
+ ## "From" at the start of a message body line.
+ string = ""
+ l = @f.gets
+ string << l until @f.eof? || MBox::is_break_line?(l = @f.gets)
+ RMail::Parser.read string
rescue RMail::Parser::Error => e
raise FatalSourceError, "error parsing mbox file: #{e.message}"
end
end
end
+ ## scan forward until we're at the valid start of a message
+ def correct_offset!
+ @mutex.synchronize do
+ @f.seek cur_offset
+ string = ""
+ until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ string << l
+ end
+ self.cur_offset += string.length
+ end
+ end
+
def raw_header offset
ret = ""
@mutex.synchronize do
@f.seek offset
until @f.eof? || (l = @f.gets) =~ /^\r*$/
- ret += l
+ ret << l
end
end
ret
end
def raw_message offset
ret = ""
- each_raw_message_line(offset) { |l| ret += l }
+ each_raw_message_line(offset) { |l| ret << l }
ret
end
## apparently it's a million times faster to call this directly if
## we're just moving messages around on disk, than reading things
@@ -106,11 +117,11 @@
## sup-sync-back has to do it.
def each_raw_message_line offset
@mutex.synchronize do
@f.seek offset
yield @f.gets
- until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ until @f.eof? || MBox::is_break_line?(l = @f.gets)
yield l
end
end
end
@@ -127,20 +138,20 @@
## 1. before a \n and a mbox separator, if it was previously at
## EOF and a new message was added; or,
## 2. at the beginning of an mbox separator (in all other
## cases).
- l = @f.gets or raise "next while at EOF"
+ l = @f.gets or return nil
if l =~ /^\s*$/ # case 1
returned_offset = @f.tell
@f.gets # now we're at a BREAK_RE, so skip past it
else # case 2
returned_offset = cur_offset
## we've already skipped past the BREAK_RE, so just go
end
while(line = @f.gets)
- break if line =~ BREAK_RE
+ break if MBox::is_break_line? line
next_offset = @f.tell
end
end
rescue SystemCallError, IOError => e
raise FatalSourceError, "Error reading #{@f.path}: #{e.message}"