lib/sup/index.rb in sup-0.11 vs lib/sup/index.rb in sup-0.12
- old
+ new
@@ -1,10 +1,11 @@
ENV["XAPIAN_FLUSH_THRESHOLD"] = "1000"
require 'xapian'
require 'set'
require 'fileutils'
+require 'monitor'
begin
require 'chronic'
$have_chronic = true
rescue LoadError => e
@@ -19,11 +20,11 @@
# for searching due to precomputing thread membership.
class Index
include InteractiveLock
STEM_LANGUAGE = "english"
- INDEX_VERSION = '2'
+ INDEX_VERSION = '4'
## dates are converted to integers for xapian, and are used for document ids,
## so we must ensure they're reasonably valid. this typically only affect
## spam.
MIN_DATE = Time.at 0
@@ -46,10 +47,11 @@
include Singleton
def initialize dir=BASE_DIR
@dir = dir
+ FileUtils.mkdir_p @dir
@lock = Lockfile.new lockfile, :retries => 0, :max_age => nil
@sync_worker = nil
@sync_queue = Queue.new
@index_mutex = Monitor.new
end
@@ -102,19 +104,20 @@
path = File.join(@dir, 'xapian')
if File.exists? path
@xapian = Xapian::WritableDatabase.new(path, Xapian::DB_OPEN)
db_version = @xapian.get_metadata 'version'
db_version = '0' if db_version.empty?
- if db_version == '1'
- info "Upgrading index format 1 to 2"
+ if false
+ info "Upgrading index format #{db_version} to #{INDEX_VERSION}"
@xapian.set_metadata 'version', INDEX_VERSION
elsif db_version != INDEX_VERSION
- fail "This Sup version expects a v#{INDEX_VERSION} index, but you have an existing v#{db_version} index. Please downgrade to your previous version and dump your labels before upgrading to this version (then run sup-sync --restore)."
+ fail "This Sup version expects a v#{INDEX_VERSION} index, but you have an existing v#{db_version} index. Please run sup-dump to save your labels, move #{path} out of the way, and run sup-sync --restore."
end
else
@xapian = Xapian::WritableDatabase.new(path, Xapian::DB_CREATE)
@xapian.set_metadata 'version', INDEX_VERSION
+ @xapian.set_metadata 'rescue-version', '0'
end
@enquire = Xapian::Enquire.new @xapian
@enquire.weighting_scheme = Xapian::BoolWeight.new
@enquire.docid_order = Xapian::Enquire::ASCENDING
end
@@ -191,15 +194,19 @@
## Load message with the given message-id from the index
def build_message id
entry = synchronize { get_entry id }
return unless entry
- source = SourceManager[entry[:source_id]]
- raise "invalid source #{entry[:source_id]}" unless source
+ locations = entry[:locations].map do |source_id,source_info|
+ source = SourceManager[source_id]
+ raise "invalid source #{source_id}" unless source
+ Location.new source, source_info
+ end
- m = Message.new :source => source, :source_info => entry[:source_info],
- :labels => entry[:labels], :snippet => entry[:snippet]
+ m = Message.new :locations => locations,
+ :labels => entry[:labels],
+ :snippet => entry[:snippet]
mk_person = lambda { |x| Person.new(*x.reverse!) }
entry[:from] = mk_person[entry[:from]]
entry[:to].map!(&mk_person)
entry[:cc].map!(&mk_person)
@@ -258,10 +265,30 @@
## was synced from
def source_for_id id
synchronize { get_entry(id)[:source_id] }
end
+ ## Yields each tearm in the index that starts with prefix
+ def each_prefixed_term prefix
+ term = @xapian._dangerous_allterms_begin prefix
+ lastTerm = @xapian._dangerous_allterms_end prefix
+ until term.equals lastTerm
+ yield term.term
+ term.next
+ end
+ nil
+ end
+
+ ## Yields (in lexicographical order) the source infos of all locations from
+ ## the given source with the given source_info prefix
+ def each_source_info source_id, prefix='', &b
+ prefix = mkterm :location, source_id, prefix
+ each_prefixed_term prefix do |x|
+ yield x[prefix.length..-1]
+ end
+ end
+
class ParseError < StandardError; end
## parse a query string from the user. returns a query object
## that can be passed to any index method with a 'query'
## argument.
@@ -286,26 +313,10 @@
else
"(#{email_field}:#{value} OR #{name_field}:#{value})"
end
end
- ## if we see a label:deleted or a label:spam term anywhere in the query
- ## string, we set the extra load_spam or load_deleted options to true.
- ## bizarre? well, because the query allows arbitrary parenthesized boolean
- ## expressions, without fully parsing the query, we can't tell whether
- ## the user is explicitly directing us to search spam messages or not.
- ## e.g. if the string is -(-(-(-(-label:spam)))), does the user want to
- ## search spam messages or not?
- ##
- ## so, we rely on the fact that turning these extra options ON turns OFF
- ## the adding of "-label:deleted" or "-label:spam" terms at the very
- ## final stage of query processing. if the user wants to search spam
- ## messages, not adding that is the right thing; if he doesn't want to
- ## search spam messages, then not adding it won't have any effect.
- query[:load_spam] = true if subs =~ /\blabel:spam\b/
- query[:load_deleted] = true if subs =~ /\blabel:deleted\b/
-
## gmail style "is" operator
subs = subs.gsub(/\b(is|has):(\S+)\b/) do
field, label = $1, $2
case label
when "read"
@@ -319,10 +330,33 @@
else
"label:#{$2}"
end
end
+ ## labels are stored lower-case in the index
+ subs = subs.gsub(/\blabel:(\S+)\b/) do
+ label = $1
+ "label:#{label.downcase}"
+ end
+
+ ## if we see a label:deleted or a label:spam term anywhere in the query
+ ## string, we set the extra load_spam or load_deleted options to true.
+ ## bizarre? well, because the query allows arbitrary parenthesized boolean
+ ## expressions, without fully parsing the query, we can't tell whether
+ ## the user is explicitly directing us to search spam messages or not.
+ ## e.g. if the string is -(-(-(-(-label:spam)))), does the user want to
+ ## search spam messages or not?
+ ##
+ ## so, we rely on the fact that turning these extra options ON turns OFF
+ ## the adding of "-label:deleted" or "-label:spam" terms at the very
+ ## final stage of query processing. if the user wants to search spam
+ ## messages, not adding that is the right thing; if he doesn't want to
+ ## search spam messages, then not adding it won't have any effect.
+ query[:load_spam] = true if subs =~ /\blabel:spam\b/
+ query[:load_deleted] = true if subs =~ /\blabel:deleted\b/
+ query[:load_killed] = true if subs =~ /\blabel:killed\b/
+
## gmail style attachments "filename" and "filetype" searches
subs = subs.gsub(/\b(filename|filetype):(\((.+?)\)\B|(\S+)\b)/) do
field, name = $1, ($3 || $4)
case field
when "filename"
@@ -451,10 +485,11 @@
'attachment_extension' => 'O',
'msgid' => 'Q',
'id' => 'Q',
'thread' => 'H',
'ref' => 'R',
+ 'location' => 'J',
}
PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
MSGID_VALUENO = 0
@@ -512,11 +547,11 @@
doc.value MSGID_VALUENO
end
def get_entry id
return unless doc = find_doc(id)
- Marshal.load doc.data
+ doc.entry
end
def thread_killed? thread_id
not run_query(Q.new(Q::OP_AND, mkterm(:thread, thread_id), mkterm(:label, :Killed)), 0, 1).empty?
end
@@ -545,10 +580,11 @@
pos_terms << mkterm(:type, 'mail')
pos_terms.concat(labels.map { |l| mkterm(:label,l) })
pos_terms << opts[:qobj] if opts[:qobj]
pos_terms << mkterm(:source_id, opts[:source_id]) if opts[:source_id]
+ pos_terms << mkterm(:location, *opts[:location]) if opts[:location]
if opts[:participants]
participant_terms = opts[:participants].map { |p| [:from,:to].map { |d| mkterm(:email, d, (Redwood::Person === p) ? p.email : p) } }.flatten
pos_terms << Q.new(Q::OP_OR, participant_terms)
end
@@ -573,12 +609,11 @@
old_entry = !do_index_static && doc.entry
snippet = do_index_static ? m.snippet : old_entry[:snippet]
entry = {
:message_id => m.id,
- :source_id => m.source.id,
- :source_info => m.source_info,
+ :locations => m.locations.map { |x| [x.source.id, x.info] },
:date => truncate_date(m.date),
:snippet => snippet,
:labels => m.labels.to_a,
:from => [m.from.email, m.from.name],
:to => m.to.map { |p| [p.email, p.name] },
@@ -593,10 +628,11 @@
doc.clear_terms
doc.clear_values
index_message_static m, doc, entry
end
+ index_message_locations doc, entry, old_entry
index_message_threading doc, entry, old_entry
index_message_labels doc, entry[:labels], (do_index_static ? [] : old_entry[:labels])
doc.entry = entry
synchronize do
@@ -635,11 +671,10 @@
# Miscellaneous terms
doc.add_term mkterm(:date, m.date) if m.date
doc.add_term mkterm(:type, 'mail')
doc.add_term mkterm(:msgid, m.id)
- doc.add_term mkterm(:source_id, m.source.id)
m.attachments.each do |a|
a =~ /\.(\w+)$/ or next
doc.add_term mkterm(:attachment_extension, $1)
end
@@ -652,10 +687,17 @@
doc.add_value MSGID_VALUENO, m.id
doc.add_value DATE_VALUENO, date_value
end
+ def index_message_locations doc, entry, old_entry
+ old_entry[:locations].map { |x| x[0] }.uniq.each { |x| doc.remove_term mkterm(:source_id, x) } if old_entry
+ entry[:locations].map { |x| x[0] }.uniq.each { |x| doc.add_term mkterm(:source_id, x) }
+ old_entry[:locations].each { |x| (doc.remove_term mkterm(:location, *x) rescue nil) } if old_entry
+ entry[:locations].each { |x| doc.add_term mkterm(:location, *x) }
+ end
+
def index_message_labels doc, new_labels, old_labels
return if new_labels == old_labels
added = new_labels.to_a - old_labels.to_a
removed = old_labels.to_a - new_labels.to_a
added.each { |t| doc.add_term mkterm(:label,t) }
@@ -714,9 +756,11 @@
when :to then PREFIX['to_email']
else raise "Invalid email term type #{args[0]}"
end + args[1].to_s.downcase
when :source_id
PREFIX['source_id'] + args[0].to_s.downcase
+ when :location
+ PREFIX['location'] + [args[0]].pack('n') + args[1].to_s
when :attachment_extension
PREFIX['attachment_extension'] + args[0].to_s.downcase
when :msgid, :ref, :thread
PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)]
else