lib/sup/thread.rb in sup-0.0.6 vs lib/sup/thread.rb in sup-0.0.7

- old
+ new

@@ -1,7 +1,27 @@ -require 'date' +## Herein is all the code responsible for threading messages. I use an +## online version of the JWZ threading algorithm: +## http://www.jwz.org/doc/threading.html +## +## I certainly didn't implement it for efficiency, but thanks to our +## search engine backend, it's typically not applied to very many +## messages at once. +## At the top level, we have a ThreadSet. A ThreadSet represents a set +## of threads, e.g. a message folder or an inbox. Each ThreadSet +## contains zero or more Threads. A Thread represents all the message +## related to a particular subject. Each Thread has one or more +## Containers. A Container is a recursive structure that holds the +## tree structure as determined by the references: and in-reply-to: +## headers. A Thread with multiple Containers occurs if they have the +## same subject, but (most likely due to someone using a primitive +## MUA) we don't have evidence from in-reply-to: or references: +## headers, only subject: (and thus our tree is probably broken). A +## Container holds zero or one message. In the case of no message, it +## means we've seen a reference to the message but haven't seen the +## message itself (yet). + module Redwood class Thread include Enumerable @@ -28,13 +48,12 @@ puts "=== start thread #{self} with #{@containers.length} trees ===" @containers.each { |c| c.dump_recursive } puts "=== end thread ===" end - ## yields each message, its depth, and its parent - ## note that the message can be a Message object, or :fake_root, - ## or nil. + ## yields each message, its depth, and its parent. note that the + ## message can be a Message object, or :fake_root, or nil. def each fake_root=false adj = 0 root = @containers.find_all { |c| !Message.subj_is_reply?(c) }.argmin { |c| c.date } if root @@ -57,19 +76,18 @@ fake_root && c.message.nil? && root.nil? && c == fud end end end + def first; each { |m, *o| return m if m }; nil; end def dirty?; any? { |m, *o| m && m.dirty? }; end def date; map { |m, *o| m.date if m }.compact.max; end def snippet; argfind { |m, *o| m && m.snippet }; end def authors; map { |m, *o| m.from if m }.compact.uniq; end def apply_label t; each { |m, *o| m && m.add_label(t) }; end - def remove_label t - each { |m, *o| m && m.remove_label(t) } - end + def remove_label t; each { |m, *o| m && m.remove_label(t) }; end def toggle_label label if has_label? label remove_label label return false @@ -80,11 +98,10 @@ end def set_labels l; each { |m, *o| m && m.labels = l }; end def has_label? t; any? { |m, *o| m && m.has_label?(t) }; end - def dirty?; any? { |m, *o| m && m.dirty? }; end def save index; each { |m, *o| m && m.save(index) }; end def direct_participants map { |m, *o| [m.from] + m.to if m }.flatten.compact.uniq end @@ -218,17 +235,17 @@ def contains_id? id; @messages.member?(id) && !@messages[id].empty?; end def thread_for m (c = @messages[m.id]) && c.root.thread end - def delete_empties - @subj_thread.each { |k, v| @subj_thread.delete(k) if v.empty? } + def delete_cruft + @subj_thread.each { |k, v| @subj_thread.delete(k) if v.empty? || v.subj != k } end - private :delete_empties + private :delete_cruft - def threads; delete_empties; @subj_thread.values; end - def size; delete_empties; @subj_thread.size; end + def threads; delete_cruft; @subj_thread.values; end + def size; delete_cruft; @subj_thread.size; end def dump @subj_thread.each do |s, t| puts "**********************" puts "** for subject #{s} **" @@ -271,23 +288,29 @@ break if size >= num next if contains_id? mid m = builder.call add_message m - load_thread_for_message m + load_thread_for_message m, :load_killed => opts[:load_killed] yield @subj_thread.size if block_given? end end ## loads in all messages needed to thread m - def load_thread_for_message m - @index.each_message_in_thread_for m, :limit => 100 do |mid, builder| + def load_thread_for_message m, opts={} + @index.each_message_in_thread_for m, opts.merge({:limit => 100}) do |mid, builder| next if contains_id? mid add_message builder.call end end + ## merges in a pre-loaded thread + def add_thread t + raise "duplicate" if @subj_thread.values.member? t + t.each { |m, *o| add_message m } + end + def is_relevant? m m.refs.any? { |ref_id| @messages[ref_id] } end ## an "online" version of the jwz threading algorithm. @@ -321,41 +344,71 @@ # puts "> have #{el}, root #{root}, oldroot #{oldroot}" # el.dump_recursive if root == oldroot if oldroot.thread - # puts "*** root (#{root.subj}) == oldroot (#{oldroot.subj}); ignoring" + ## check to see if the subject is still the same (in the case + ## that we first added a child message with a different + ## subject) + + ## this code is duplicated below. sorry! TODO: refactor + s = Message.normalize_subj(root.subj) + unless @subj_thread[s] == root.thread + ## Redwood::log "[1] moving thread to new subject #{root.subj}" + if @subj_thread[s] + @subj_thread[s] << root + root.thread = @subj_thread[s] + else + @subj_thread[s] = root.thread + end + end + else ## to disable subject grouping, use the next line instead ## (and the same for below) - #Redwood::log "[1] normalized subject for #{id} is #{Message.normalize_subj(root.subj)}" + #Redwood::log "[1] for #{root}, subject #{Message.normalize_subj(root.subj)} has #{@subj_thread[Message.normalize_subj(root.subj)] ? 'a' : 'no'} thread" thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new) #thread = (@subj_thread[root.id] ||= Thread.new) thread << root root.thread = thread - # puts "# (1) added #{root} to #{thread}" + # Redwood::log "[1] added #{root} to #{thread}" end else if oldroot.thread ## new root. need to drop old one and put this one in its place - # puts "*** DROPPING #{oldroot} from #{oldroot.thread}" oldroot.thread.drop oldroot oldroot.thread = nil end if root.thread - # puts "*** IGNORING cuz root already has a thread" + ## check to see if the subject is still the same (in the case + ## that we first added a child message with a different + ## subject) + s = Message.normalize_subj(root.subj) + unless @subj_thread[s] == root.thread + # Redwood::log "[2] moving thread to new subject #{root.subj}" + if @subj_thread[s] + @subj_thread[s] << root + root.thread = @subj_thread[s] + else + @subj_thread[s] = root.thread + end + end + else ## to disable subject grouping, use the next line instead ## (and the same above) - #Redwood::log "[2] normalized subject for #{id} is #{Message.normalize_subj(root.subj)}" + + ## this code is duplicated above. sorry! TODO: refactor + # Redwood::log "[2] for #{root}, subject '#{Message.normalize_subj(root.subj)}' has #{@subj_thread[Message.normalize_subj(root.subj)] ? 'a' : 'no'} thread" + thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new) #thread = (@subj_thread[root.id] ||= Thread.new) thread << root root.thread = thread - # puts "# (2) added #{root} to #{thread}" + # Redwood::log "[2] added #{root} to #{thread}" end end ## last bit @num_messages += 1