lib/sup/index.rb in sup-0.0.1 vs lib/sup/index.rb in sup-0.0.2

- old
+ new

@@ -16,21 +16,15 @@ end class Index include Singleton - LOAD_THREAD_PETIT_DELAY = 0.1 - LOAD_THREAD_GRAND_DELAY = 5 - - MESSAGES_AT_A_TIME = 10 - attr_reader :index # debugging only def initialize dir=BASE_DIR @dir = dir @mutex = Mutex.new - @load_thread = nil # loads new messages @sources = {} @sources_dirty = false self.class.i_am_the_instance self end @@ -48,11 +42,12 @@ def add_source source raise "duplicate source!" if @sources.include? source @sources_dirty = true source.id ||= @sources.size - source.id += 1 while @sources.member? source.id + ##TODO: why was this necessary? + ##source.id += 1 while @sources.member? source.id @sources[source.id] = source end def source_for name; @sources.values.find { |s| s.is_source_for? name }; end def usual_sources; @sources.values.find_all { |s| s.usual? }; end @@ -69,11 +64,11 @@ else Redwood::log "creating index" field_infos = Ferret::Index::FieldInfos.new :store => :yes field_infos.add_field :message_id field_infos.add_field :source_id - field_infos.add_field :source_info, :index => :no, :term_vector => :no + field_infos.add_field :source_info field_infos.add_field :date, :index => :untokenized field_infos.add_field :body, :store => :no field_infos.add_field :label field_infos.add_field :subject field_infos.add_field :from @@ -188,35 +183,54 @@ def build_message docid doc = @index[docid] source = @sources[doc[:source_id].to_i] #puts "building message #{doc[:message_id]} (#{source}##{doc[:source_info]})" raise "invalid source #{doc[:source_id]}" unless source - begin - raise "no snippet" unless doc[:snippet] - Message.new source, doc[:source_info].to_i, - doc[:label].split(" ").map { |s| s.intern }, - doc[:snippet] - rescue MessageFormatError => e - raise IndexError.new(source, "error building message #{doc[:message_id]} at #{source}/#{doc[:source_info]}: #{e.message}") - nil - end - end - def start_load_thread - return if @load_thread - @load_thread = true - @load_thread = ::Thread.new do - while @load_thread - load_some_entries ENTRIES_AT_A_TIME, LOAD_THREAD_PETIT_DELAY, LOAD_THREAD_GRAND_DELAY + m = + if source.broken? + nil + else + begin + Message.new :source => source, :source_info => doc[:source_info].to_i, + :labels => doc[:label].split(" ").map { |s| s.intern }, + :snippet => doc[:snippet] + rescue MessageFormatError => e + raise IndexError.new(source, "error building message #{doc[:message_id]} at #{source}/#{doc[:source_info]}: #{e.message}") + rescue SourceError => e + nil + end end + + unless m + fake_header = { + "date" => Time.at(doc[:date].to_i), + "subject" => unwrap_subj(doc[:subject]), + "from" => doc[:from], + "to" => doc[:to], + "message-id" => doc[:message_id], + "references" => doc[:refs], + } + + m = Message.new :labels => doc[:label].split(" ").map { |s| s.intern }, + :snippet => doc[:snippet], :header => fake_header, + :body => <<EOS +#{doc[:snippet]}... + +An error occurred while loading this message. It is possible that the source +has changed, or (in the case of remote sources) is down. + +The error message was: + #{source.broken_msg} +EOS end + m end - def end_load_thread; @load_thread = nil; end def fresh_thread_id; @next_thread_id += 1; end - def wrap_subj subj; "__START_SUBJECT__ #{subj} __END_SUBJECT__"; end + def unwrap_subj subj; subj =~ /__START_SUBJECT__ (.*?) __END_SUBJECT__/ && $1; end def add_message m return false if contains? m source_id = @@ -236,11 +250,11 @@ :snippet => m.snippet, :label => m.labels.join(" "), :from => m.from ? m.from.email : "", :to => (m.to + m.cc + m.bcc).map { |x| x.email }.join(" "), :subject => wrap_subj(Message.normalize_subj(m.subj)), - :refs => (m.refs + m.replytos).join(" "), + :refs => (m.refs + m.replytos).uniq.join(" "), } @index.add_document d ## TODO: figure out why this is sometimes triggered @@ -314,40 +328,18 @@ @sources_dirty = false end def save_sources fn=Redwood::SOURCE_FN if @sources_dirty || @sources.any? { |id, s| s.dirty? } - FileUtils.mv fn, fn + ".bak", :force => true if File.exists? fn + bakfn = fn + ".bak" + if File.exists? fn + File.chmod 0600, fn + FileUtils.mv fn, bakfn, :force => true unless File.exists?(bakfn) && File.size(bakfn) > File.size(fn) + end Redwood::save_yaml_obj @sources.values, fn + File.chmod 0600, fn end @sources_dirty = false - end - - def load_some_entries max=ENTRIES_AT_A_TIME, delay1=nil, delay2=nil - num = 0 - begin - @sources.each_with_index do |source, source_id| - next if source.done? || num >= max - source.each do |source_info, label| - begin - m = Message.new(source, source_info, label + [:inbox]) - add_message m unless contains_id? m.id - puts m.content.inspect - num += 1 - rescue MessageFormatError => e - $stderr.puts "ignoring erroneous message at #{source}##{source_info}: #{e.message}" - end - break if num >= max - sleep delay1 if delay1 - end - Redwood::log "loaded #{num} entries from #{source}" - sleep delay2 if delay2 - end - ensure - save_sources - save_index - end - num end end end