lib/sup/thread.rb in sup-0.0.6 vs lib/sup/thread.rb in sup-0.0.7
- old
+ new
@@ -1,7 +1,27 @@
-require 'date'
+## Herein is all the code responsible for threading messages. I use an
+## online version of the JWZ threading algorithm:
+## http://www.jwz.org/doc/threading.html
+##
+## I certainly didn't implement it for efficiency, but thanks to our
+## search engine backend, it's typically not applied to very many
+## messages at once.
+## At the top level, we have a ThreadSet. A ThreadSet represents a set
+## of threads, e.g. a message folder or an inbox. Each ThreadSet
+## contains zero or more Threads. A Thread represents all the message
+## related to a particular subject. Each Thread has one or more
+## Containers. A Container is a recursive structure that holds the
+## tree structure as determined by the references: and in-reply-to:
+## headers. A Thread with multiple Containers occurs if they have the
+## same subject, but (most likely due to someone using a primitive
+## MUA) we don't have evidence from in-reply-to: or references:
+## headers, only subject: (and thus our tree is probably broken). A
+## Container holds zero or one message. In the case of no message, it
+## means we've seen a reference to the message but haven't seen the
+## message itself (yet).
+
module Redwood
class Thread
include Enumerable
@@ -28,13 +48,12 @@
puts "=== start thread #{self} with #{@containers.length} trees ==="
@containers.each { |c| c.dump_recursive }
puts "=== end thread ==="
end
- ## yields each message, its depth, and its parent
- ## note that the message can be a Message object, or :fake_root,
- ## or nil.
+ ## yields each message, its depth, and its parent. note that the
+ ## message can be a Message object, or :fake_root, or nil.
def each fake_root=false
adj = 0
root = @containers.find_all { |c| !Message.subj_is_reply?(c) }.argmin { |c| c.date }
if root
@@ -57,19 +76,18 @@
fake_root && c.message.nil? && root.nil? && c == fud
end
end
end
+ def first; each { |m, *o| return m if m }; nil; end
def dirty?; any? { |m, *o| m && m.dirty? }; end
def date; map { |m, *o| m.date if m }.compact.max; end
def snippet; argfind { |m, *o| m && m.snippet }; end
def authors; map { |m, *o| m.from if m }.compact.uniq; end
def apply_label t; each { |m, *o| m && m.add_label(t) }; end
- def remove_label t
- each { |m, *o| m && m.remove_label(t) }
- end
+ def remove_label t; each { |m, *o| m && m.remove_label(t) }; end
def toggle_label label
if has_label? label
remove_label label
return false
@@ -80,11 +98,10 @@
end
def set_labels l; each { |m, *o| m && m.labels = l }; end
def has_label? t; any? { |m, *o| m && m.has_label?(t) }; end
- def dirty?; any? { |m, *o| m && m.dirty? }; end
def save index; each { |m, *o| m && m.save(index) }; end
def direct_participants
map { |m, *o| [m.from] + m.to if m }.flatten.compact.uniq
end
@@ -218,17 +235,17 @@
def contains_id? id; @messages.member?(id) && !@messages[id].empty?; end
def thread_for m
(c = @messages[m.id]) && c.root.thread
end
- def delete_empties
- @subj_thread.each { |k, v| @subj_thread.delete(k) if v.empty? }
+ def delete_cruft
+ @subj_thread.each { |k, v| @subj_thread.delete(k) if v.empty? || v.subj != k }
end
- private :delete_empties
+ private :delete_cruft
- def threads; delete_empties; @subj_thread.values; end
- def size; delete_empties; @subj_thread.size; end
+ def threads; delete_cruft; @subj_thread.values; end
+ def size; delete_cruft; @subj_thread.size; end
def dump
@subj_thread.each do |s, t|
puts "**********************"
puts "** for subject #{s} **"
@@ -271,23 +288,29 @@
break if size >= num
next if contains_id? mid
m = builder.call
add_message m
- load_thread_for_message m
+ load_thread_for_message m, :load_killed => opts[:load_killed]
yield @subj_thread.size if block_given?
end
end
## loads in all messages needed to thread m
- def load_thread_for_message m
- @index.each_message_in_thread_for m, :limit => 100 do |mid, builder|
+ def load_thread_for_message m, opts={}
+ @index.each_message_in_thread_for m, opts.merge({:limit => 100}) do |mid, builder|
next if contains_id? mid
add_message builder.call
end
end
+ ## merges in a pre-loaded thread
+ def add_thread t
+ raise "duplicate" if @subj_thread.values.member? t
+ t.each { |m, *o| add_message m }
+ end
+
def is_relevant? m
m.refs.any? { |ref_id| @messages[ref_id] }
end
## an "online" version of the jwz threading algorithm.
@@ -321,41 +344,71 @@
# puts "> have #{el}, root #{root}, oldroot #{oldroot}"
# el.dump_recursive
if root == oldroot
if oldroot.thread
- # puts "*** root (#{root.subj}) == oldroot (#{oldroot.subj}); ignoring"
+ ## check to see if the subject is still the same (in the case
+ ## that we first added a child message with a different
+ ## subject)
+
+ ## this code is duplicated below. sorry! TODO: refactor
+ s = Message.normalize_subj(root.subj)
+ unless @subj_thread[s] == root.thread
+ ## Redwood::log "[1] moving thread to new subject #{root.subj}"
+ if @subj_thread[s]
+ @subj_thread[s] << root
+ root.thread = @subj_thread[s]
+ else
+ @subj_thread[s] = root.thread
+ end
+ end
+
else
## to disable subject grouping, use the next line instead
## (and the same for below)
- #Redwood::log "[1] normalized subject for #{id} is #{Message.normalize_subj(root.subj)}"
+ #Redwood::log "[1] for #{root}, subject #{Message.normalize_subj(root.subj)} has #{@subj_thread[Message.normalize_subj(root.subj)] ? 'a' : 'no'} thread"
thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new)
#thread = (@subj_thread[root.id] ||= Thread.new)
thread << root
root.thread = thread
- # puts "# (1) added #{root} to #{thread}"
+ # Redwood::log "[1] added #{root} to #{thread}"
end
else
if oldroot.thread
## new root. need to drop old one and put this one in its place
- # puts "*** DROPPING #{oldroot} from #{oldroot.thread}"
oldroot.thread.drop oldroot
oldroot.thread = nil
end
if root.thread
- # puts "*** IGNORING cuz root already has a thread"
+ ## check to see if the subject is still the same (in the case
+ ## that we first added a child message with a different
+ ## subject)
+ s = Message.normalize_subj(root.subj)
+ unless @subj_thread[s] == root.thread
+ # Redwood::log "[2] moving thread to new subject #{root.subj}"
+ if @subj_thread[s]
+ @subj_thread[s] << root
+ root.thread = @subj_thread[s]
+ else
+ @subj_thread[s] = root.thread
+ end
+ end
+
else
## to disable subject grouping, use the next line instead
## (and the same above)
- #Redwood::log "[2] normalized subject for #{id} is #{Message.normalize_subj(root.subj)}"
+
+ ## this code is duplicated above. sorry! TODO: refactor
+ # Redwood::log "[2] for #{root}, subject '#{Message.normalize_subj(root.subj)}' has #{@subj_thread[Message.normalize_subj(root.subj)] ? 'a' : 'no'} thread"
+
thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new)
#thread = (@subj_thread[root.id] ||= Thread.new)
thread << root
root.thread = thread
- # puts "# (2) added #{root} to #{thread}"
+ # Redwood::log "[2] added #{root} to #{thread}"
end
end
## last bit
@num_messages += 1