require 'date' module Redwood class Thread include Enumerable attr_reader :containers def initialize raise "wrong thread, buddy!" if block_given? @containers = [] end def << c @containers << c end def empty?; @containers.empty?; end def drop c raise "bad drop" unless @containers.member? c @containers.delete c end def dump puts "=== start thread #{self} with #{@containers.length} trees ===" @containers.each { |c| c.dump_recursive } puts "=== end thread ===" end ## yields each message and some stuff def each fake_root=false adj = 0 root = @containers.find_all { |c| !Message.subj_is_reply?(c) }.argmin { |c| c.date } if root adj = 1 root.first_useful_descendant.each_with_stuff do |c, d, par| yield c.message, d, (par ? par.message : nil) end elsif @containers.length > 1 && fake_root adj = 1 yield :fake_root, 0, nil end @containers.each do |cont| next if cont == root fud = cont.first_useful_descendant fud.each_with_stuff do |c, d, par| ## special case here: if we're an empty root that's already ## been joined by a fake root, don't emit yield c.message, d + adj, (par ? par.message : nil) unless fake_root && c.message.nil? && root.nil? && c == fud end end end def dirty?; any? { |m, *o| m && m.dirty? }; end def date; map { |m, *o| m.date if m }.compact.max; end def snippet; argfind { |m, *o| m && m.snippet }; end def authors; map { |m, *o| m.from if m }.compact.uniq; end def apply_label t; each { |m, *o| m && m.add_label(t) }; end def remove_label t each { |m, *o| m && m.remove_label(t) } end def toggle_label label if has_label? label remove_label label return false else apply_label label return true end end def set_labels l; each { |m, *o| m && m.labels = l }; end def has_label? t; any? { |m, *o| m && m.has_label?(t) }; end def save index; each { |m, *o| m && m.save(index) }; end def direct_participants map { |m, *o| [m.from] + m.to if m }.flatten.compact.uniq end def participants map { |m, *o| [m.from] + m.to + m.cc + m.bcc if m }.flatten.compact.uniq end def size; map { |m, *o| m ? 1 : 0 }.sum; end def subj; argfind { |m, *o| m && m.subj }; end def labels map { |m, *o| m && m.labels }.flatten.compact.uniq.sort_by { |t| t.to_s } end def labels= l each { |m, *o| m && m.labels = l.clone } end def latest_message inject(nil) do |a, b| b = b.first if a.nil? b elsif b.nil? a else b.date > a.date ? b : a end end end def to_s "<thread containing: #{@containers.join ', '}>" end end ## recursive structure used internally to represent message trees as ## described by reply-to: and references: headers. ## ## the 'id' field is the same as the message id. but the message might ## be empty, in the case that we represent a message that was referenced ## by another message (as an ancestor) but never received. class Container attr_accessor :message, :parent, :children, :id, :thread def initialize id raise "non-String #{id.inspect}" unless id.is_a? String @id = id @message, @parent, @thread = nil, nil, nil @children = [] end def each_with_stuff parent=nil yield self, 0, parent @children.each do |c| c.each_with_stuff(self) { |cc, d, par| yield cc, d + 1, par } end end def descendant_of? o if o == self true else @parent && @parent.descendant_of?(o) end end def == o; Container === o && id == o.id; end def empty?; @message.nil?; end def root?; @parent.nil?; end def root; root? ? self : @parent.root; end def first_useful_descendant if empty? && @children.size == 1 @children.first.first_useful_descendant else self end end def find_attr attr if empty? @children.argfind { |c| c.find_attr attr } else @message.send attr end end def subj; find_attr :subj; end def date; find_attr :date; end def is_reply?; subj && Message.subject_is_reply?(subj); end def to_s [ "<#{id}", (@parent.nil? ? nil : "parent=#{@parent.id}"), (@children.empty? ? nil : "children=#{@children.map { |c| c.id }.inspect}"), ].compact.join(" ") + ">" end def dump_recursive indent=0, root=true, parent=nil raise "inconsistency" unless parent.nil? || parent.children.include?(self) unless root print " " * indent print "+->" end line = #"[#{useful? ? 'U' : ' '}] " + if @message "[#{thread}] #{@message.subj} " ##{@message.refs.inspect} / #{@message.replytos.inspect}" else "<no message>" end puts "#{id} #{line}"#[0 .. (105 - indent)] indent += 3 @children.each { |c| c.dump_recursive indent, false, self } end end ## a set of threads (so a forest). builds the thread structures by ## reading messages from an index. class ThreadSet attr_reader :num_messages def initialize index @index = index @num_messages = 0 @messages = {} ## map from message ids to container objects @subj_thread = {} ## map from subject strings to thread objects end def contains_id? id; @messages.member?(id) && !@messages[id].empty?; end def thread_for m (c = @messages[m.id]) && c.root.thread end def delete_empties @subj_thread.each { |k, v| @subj_thread.delete(k) if v.empty? } end private :delete_empties def threads; delete_empties; @subj_thread.values; end def size; delete_empties; @subj_thread.size; end def dump @subj_thread.each do |s, t| puts "**********************" puts "** for subject #{s} **" puts "**********************" t.dump end end def link p, c, overwrite=false if p == c || p.descendant_of?(c) || c.descendant_of?(p) # would create a loop # puts "*** linking parent #{p} and child #{c} would create a loop" return end if c.parent.nil? || overwrite c.parent.children.delete c if overwrite && c.parent if c.thread c.thread.drop c c.thread = nil end p.children << c c.parent = p end end private :link def remove mid return unless(c = @messages[mid]) c.parent.children.delete c if c.parent if c.thread c.thread.drop c c.thread = nil end end ## load in (at most) num number of threads from the index def load_n_threads num, opts={} @index.each_id_by_date opts do |mid, builder| break if size >= num next if contains_id? mid m = builder.call add_message m load_thread_for_message m yield @subj_thread.size if block_given? end end ## loads in all messages needed to thread m def load_thread_for_message m @index.each_message_in_thread_for m, :limit => 100 do |mid, builder| next if contains_id? mid add_message builder.call end end def is_relevant? m m.refs.any? { |ref_id| @messages[ref_id] } end ## an "online" version of the jwz threading algorithm. def add_message message id = message.id el = (@messages[id] ||= Container.new id) return if @messages[id].message # we've seen it before el.message = message oldroot = el.root ## link via references: prev = nil message.refs.each do |ref_id| raise "non-String ref id #{ref_id.inspect} (full: #{message.refs.inspect})" unless ref_id.is_a?(String) ref = (@messages[ref_id] ||= Container.new ref_id) link prev, ref if prev prev = ref end link prev, el, true if prev ## link via in-reply-to: message.replytos.each do |ref_id| ref = (@messages[ref_id] ||= Container.new ref_id) link ref, el, true break # only do the first one end ## update subject grouping root = el.root # puts "> have #{el}, root #{root}, oldroot #{oldroot}" # el.dump_recursive if root == oldroot if oldroot.thread # puts "*** root (#{root.subj}) == oldroot (#{oldroot.subj}); ignoring" else ## to disable subject grouping, use the next line instead ## (and the same for below) #Redwood::log "[1] normalized subject for #{id} is #{Message.normalize_subj(root.subj)}" thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new) #thread = (@subj_thread[root.id] ||= Thread.new) thread << root root.thread = thread # puts "# (1) added #{root} to #{thread}" end else if oldroot.thread ## new root. need to drop old one and put this one in its place # puts "*** DROPPING #{oldroot} from #{oldroot.thread}" oldroot.thread.drop oldroot oldroot.thread = nil end if root.thread # puts "*** IGNORING cuz root already has a thread" else ## to disable subject grouping, use the next line instead ## (and the same above) #Redwood::log "[2] normalized subject for #{id} is #{Message.normalize_subj(root.subj)}" thread = (@subj_thread[Message.normalize_subj(root.subj)] ||= Thread.new) #thread = (@subj_thread[root.id] ||= Thread.new) thread << root root.thread = thread # puts "# (2) added #{root} to #{thread}" end end ## last bit @num_messages += 1 end end end