lib/sup/index.rb in sup-0.0.1 vs lib/sup/index.rb in sup-0.0.2
- old
+ new
@@ -16,21 +16,15 @@
end
class Index
include Singleton
- LOAD_THREAD_PETIT_DELAY = 0.1
- LOAD_THREAD_GRAND_DELAY = 5
-
- MESSAGES_AT_A_TIME = 10
-
attr_reader :index # debugging only
def initialize dir=BASE_DIR
@dir = dir
@mutex = Mutex.new
- @load_thread = nil # loads new messages
@sources = {}
@sources_dirty = false
self.class.i_am_the_instance self
end
@@ -48,11 +42,12 @@
def add_source source
raise "duplicate source!" if @sources.include? source
@sources_dirty = true
source.id ||= @sources.size
- source.id += 1 while @sources.member? source.id
+ ##TODO: why was this necessary?
+ ##source.id += 1 while @sources.member? source.id
@sources[source.id] = source
end
def source_for name; @sources.values.find { |s| s.is_source_for? name }; end
def usual_sources; @sources.values.find_all { |s| s.usual? }; end
@@ -69,11 +64,11 @@
else
Redwood::log "creating index"
field_infos = Ferret::Index::FieldInfos.new :store => :yes
field_infos.add_field :message_id
field_infos.add_field :source_id
- field_infos.add_field :source_info, :index => :no, :term_vector => :no
+ field_infos.add_field :source_info
field_infos.add_field :date, :index => :untokenized
field_infos.add_field :body, :store => :no
field_infos.add_field :label
field_infos.add_field :subject
field_infos.add_field :from
@@ -188,35 +183,54 @@
def build_message docid
doc = @index[docid]
source = @sources[doc[:source_id].to_i]
#puts "building message #{doc[:message_id]} (#{source}##{doc[:source_info]})"
raise "invalid source #{doc[:source_id]}" unless source
- begin
- raise "no snippet" unless doc[:snippet]
- Message.new source, doc[:source_info].to_i,
- doc[:label].split(" ").map { |s| s.intern },
- doc[:snippet]
- rescue MessageFormatError => e
- raise IndexError.new(source, "error building message #{doc[:message_id]} at #{source}/#{doc[:source_info]}: #{e.message}")
- nil
- end
- end
- def start_load_thread
- return if @load_thread
- @load_thread = true
- @load_thread = ::Thread.new do
- while @load_thread
- load_some_entries ENTRIES_AT_A_TIME, LOAD_THREAD_PETIT_DELAY, LOAD_THREAD_GRAND_DELAY
+ m =
+ if source.broken?
+ nil
+ else
+ begin
+ Message.new :source => source, :source_info => doc[:source_info].to_i,
+ :labels => doc[:label].split(" ").map { |s| s.intern },
+ :snippet => doc[:snippet]
+ rescue MessageFormatError => e
+ raise IndexError.new(source, "error building message #{doc[:message_id]} at #{source}/#{doc[:source_info]}: #{e.message}")
+ rescue SourceError => e
+ nil
+ end
end
+
+ unless m
+ fake_header = {
+ "date" => Time.at(doc[:date].to_i),
+ "subject" => unwrap_subj(doc[:subject]),
+ "from" => doc[:from],
+ "to" => doc[:to],
+ "message-id" => doc[:message_id],
+ "references" => doc[:refs],
+ }
+
+ m = Message.new :labels => doc[:label].split(" ").map { |s| s.intern },
+ :snippet => doc[:snippet], :header => fake_header,
+ :body => <<EOS
+#{doc[:snippet]}...
+
+An error occurred while loading this message. It is possible that the source
+has changed, or (in the case of remote sources) is down.
+
+The error message was:
+ #{source.broken_msg}
+EOS
end
+ m
end
- def end_load_thread; @load_thread = nil; end
def fresh_thread_id; @next_thread_id += 1; end
-
def wrap_subj subj; "__START_SUBJECT__ #{subj} __END_SUBJECT__"; end
+ def unwrap_subj subj; subj =~ /__START_SUBJECT__ (.*?) __END_SUBJECT__/ && $1; end
def add_message m
return false if contains? m
source_id =
@@ -236,11 +250,11 @@
:snippet => m.snippet,
:label => m.labels.join(" "),
:from => m.from ? m.from.email : "",
:to => (m.to + m.cc + m.bcc).map { |x| x.email }.join(" "),
:subject => wrap_subj(Message.normalize_subj(m.subj)),
- :refs => (m.refs + m.replytos).join(" "),
+ :refs => (m.refs + m.replytos).uniq.join(" "),
}
@index.add_document d
## TODO: figure out why this is sometimes triggered
@@ -314,40 +328,18 @@
@sources_dirty = false
end
def save_sources fn=Redwood::SOURCE_FN
if @sources_dirty || @sources.any? { |id, s| s.dirty? }
- FileUtils.mv fn, fn + ".bak", :force => true if File.exists? fn
+ bakfn = fn + ".bak"
+ if File.exists? fn
+ File.chmod 0600, fn
+ FileUtils.mv fn, bakfn, :force => true unless File.exists?(bakfn) && File.size(bakfn) > File.size(fn)
+ end
Redwood::save_yaml_obj @sources.values, fn
+ File.chmod 0600, fn
end
@sources_dirty = false
- end
-
- def load_some_entries max=ENTRIES_AT_A_TIME, delay1=nil, delay2=nil
- num = 0
- begin
- @sources.each_with_index do |source, source_id|
- next if source.done? || num >= max
- source.each do |source_info, label|
- begin
- m = Message.new(source, source_info, label + [:inbox])
- add_message m unless contains_id? m.id
- puts m.content.inspect
- num += 1
- rescue MessageFormatError => e
- $stderr.puts "ignoring erroneous message at #{source}##{source_info}: #{e.message}"
- end
- break if num >= max
- sleep delay1 if delay1
- end
- Redwood::log "loaded #{num} entries from #{source}"
- sleep delay2 if delay2
- end
- ensure
- save_sources
- save_index
- end
- num
end
end
end