lib/sup/mbox/loader.rb in sup-0.0.1 vs lib/sup/mbox/loader.rb in sup-0.0.2
- old
+ new
@@ -2,115 +2,112 @@
require 'rmail'
module Redwood
module MBox
-class Error < StandardError; end
+class Loader < Source
+ attr_reader :labels
-class Loader
- attr_reader :filename
- bool_reader :usual, :archived, :read, :dirty
- attr_accessor :id, :labels
+ def initialize uri, start_offset=nil, usual=true, archived=false, id=nil
+ raise ArgumentError, "not an mbox uri" unless uri =~ %r!mbox://!
+ super
- ## end_offset is the last offsets within the file which we've read.
- ## everything after that is considered new messages that haven't
- ## been indexed.
- def initialize filename, end_offset=0, usual=true, archived=false, id=nil
- @filename = filename.gsub(%r(^mbox://), "")
- @end_offset = end_offset
- @dirty = false
- @usual = usual
- @archived = archived
- @id = id
@mutex = Mutex.new
+ @filename = uri.sub(%r!^mbox://!, "")
@f = File.open @filename
- @labels = ([
- :unread,
- archived ? nil : :inbox,
- ] +
- if File.dirname(filename) =~ /\b(var|usr|spool)\b/
- []
- else
- [File.basename(filename).intern]
- end).compact
+ ## heuristic: use the filename as a label, unless the file
+ ## has a path that probably represents an inbox.
+ @labels = [:unread]
+ @labels << File.basename(@filename).intern unless File.dirname(@filename) =~ /\b(var|usr|spool)\b/
end
- def reset!; @end_offset = 0; @dirty = true; end
- def == o; o.is_a?(Loader) && o.filename == filename; end
- def to_s; "mbox://#{@filename}"; end
+ def start_offset; 0; end
+ def end_offset; File.size @f; end
- def is_source_for? s
- @filename == s || self.to_s == s
- end
-
- def load_header offset=nil
+ def load_header offset
header = nil
@mutex.synchronize do
- @f.seek offset if offset
+ @f.seek offset
+ l = @f.gets
+ unless l =~ BREAK_RE
+ self.broken_msg = "offset mismatch in mbox file offset #{offset.inspect}: #{l.inspect}. Run 'sup-import --rebuild #{to_s}' to correct this."
+ raise SourceError, self.broken_msg
+ end
header = MBox::read_header @f
end
header
end
def load_message offset
- ret = nil
@mutex.synchronize do
@f.seek offset
- RMail::Mailbox::MBoxReader.new(@f).each_message do |input|
- return RMail::Parser.read(input)
+ begin
+ RMail::Mailbox::MBoxReader.new(@f).each_message do |input|
+ return RMail::Parser.read(input)
+ end
+ rescue RMail::Parser::Error => e
+ raise SourceError, "error parsing message with rmail: #{e.message}"
end
end
end
- ## load the full header text
- def load_header_text offset
+ def raw_header offset
ret = ""
@mutex.synchronize do
@f.seek offset
until @f.eof? || (l = @f.gets) =~ /^$/
ret += l
end
end
ret
end
+ def raw_full_message offset
+ ret = ""
+ @mutex.synchronize do
+ @f.seek offset
+ @f.gets # skip mbox header
+ until @f.eof? || (l = @f.gets) =~ BREAK_RE
+ ret += l
+ end
+ end
+ ret
+ end
+
def next
- return nil if done?
- @dirty = true
- next_end_offset = @end_offset
+ returned_offset = nil
+ next_offset = cur_offset
@mutex.synchronize do
- @f.seek @end_offset
+ @f.seek cur_offset
- @f.gets # skip the From separator
- next_end_offset = @f.tell
+ ## cur_offset could be at one of two places here:
+
+ ## 1. before a \n and a mbox separator, if it was previously at
+ ## EOF and a new message was added; or,
+ ## 2. at the beginning of an mbox separator (in all other
+ ## cases).
+
+ l = @f.gets or raise "next while at EOF"
+ if l =~ /^\s*$/ # case 1
+ returned_offset = @f.tell
+ @f.gets # now we're at a BREAK_RE, so skip past it
+ else # case 2
+ returned_offset = cur_offset
+ ## we've already skipped past the BREAK_RE, to just go
+ end
+
while(line = @f.gets)
break if line =~ BREAK_RE
- next_end_offset = @f.tell + 1
+ next_offset = @f.tell
end
end
- start_offset = @end_offset
- @end_offset = next_end_offset
-
- start_offset
+ self.cur_offset = next_offset
+ [returned_offset, labels]
end
-
- def each
- until @end_offset >= File.size(@f)
- n = self.next
- yield(n, labels) if n
- end
- end
-
- def each_header
- each { |offset, labels| yield offset, labels, load_header(offset) }
- end
-
- def done?; @end_offset >= File.size(@f); end
- def total; File.size @f; end
end
-Redwood::register_yaml(Loader, %w(filename end_offset usual archived id))
+Redwood::register_yaml(Loader, %w(uri cur_offset usual archived id))
end
end