lib/sup/index.rb in sup-0.3 vs lib/sup/index.rb in sup-0.4
- old
+ new
@@ -169,21 +169,28 @@
else
m.source.id or raise "unregistered source #{m.source} (id #{m.source.id.inspect})"
end
to = (m.to + m.cc + m.bcc).map { |x| x.email }.join(" ")
+ snippet =
+ if m.snippet_contains_encrypted_content? && $config[:discard_snippets_from_encrypted_messages]
+ ""
+ else
+ m.snippet
+ end
+
d = {
:message_id => m.id,
:source_id => source_id,
:source_info => m.source_info,
:date => m.date.to_indexable_s,
:body => m.content,
- :snippet => m.snippet,
+ :snippet => snippet,
:label => m.labels.uniq.join(" "),
:from => m.from ? m.from.email : "",
:to => (m.to + m.cc + m.bcc).map { |x| x.email }.join(" "),
- :subject => wrap_subj(Message.normalize_subj(m.subj)),
+ :subject => wrap_subj(m.subj),
:refs => (m.refs + m.replytos).uniq.join(" "),
}
@index.delete docid if docid
@index.add_document d
@@ -234,10 +241,11 @@
##
## only two options, :limit and :skip_killed. if :skip_killed is
## true, stops loading any thread if a message with a :killed flag
## is found.
SAME_SUBJECT_DATE_LIMIT = 7
+ MAX_CLAUSES = 1000
def each_message_in_thread_for m, opts={}
#Redwood::log "Building thread for #{m.id}: #{m.subj}"
messages = {}
searched = {}
num_queries = 0
@@ -262,17 +270,20 @@
pending = [m.id]
end
until pending.empty? || (opts[:limit] && messages.size >= opts[:limit])
q = Ferret::Search::BooleanQuery.new true
+ # this disappeared in newer ferrets... wtf.
+ # q.max_clause_count = 2048
- pending.each do |id|
+ lim = [MAX_CLAUSES / 2, pending.length].min
+ pending[0 ... lim].each do |id|
searched[id] = true
q.add_query Ferret::Search::TermQuery.new(:message_id, id), :should
q.add_query Ferret::Search::TermQuery.new(:refs, id), :should
end
- pending = []
+ pending = pending[lim .. -1]
q = build_query :qobj => q
num_queries += 1
killed = false
@@ -311,11 +322,11 @@
fake_header = {
"date" => Time.at(doc[:date].to_i),
"subject" => unwrap_subj(doc[:subject]),
"from" => doc[:from],
- "to" => doc[:to],
+ "to" => doc[:to].split(/\s+/).join(", "), # reformat
"message-id" => doc[:message_id],
"references" => doc[:refs].split(/\s+/).map { |x| "<#{x}>" }.join(" "),
}
Message.new :source => source, :source_info => doc[:source_info].to_i,
@@ -380,23 +391,71 @@
protected
## do any specialized parsing
## returns nil and flashes error message if parsing failed
- def parse_user_query_string str
- result = str.gsub(/\b(to|from):(\S+)\b/) do
+ def parse_user_query_string s
+ extraopts = {}
+
+ ## this is a little hacky, but it works, at least until ferret changes
+ ## its api. we parse the user query string with ferret twice: the first
+ ## time we just turn the resulting object back into a string, which has
+ ## the next effect of transforming the original string into a nice
+ ## normalized form with + and - instead of AND, OR, etc. then we do some
+ ## string substitutions which depend on this normalized form, re-parse
+ ## the string with Ferret, and return the resulting query object.
+
+ norms = @qparser.parse(s).to_s
+ Redwood::log "normalized #{s.inspect} to #{norms.inspect}" unless s == norms
+
+ subs = norms.gsub(/\b(to|from):(\S+)\b/) do
field, name = $1, $2
if(p = ContactManager.contact_for(name))
[field, p.email]
+ elsif name == "me"
+ [field, "(" + AccountManager.user_emails.join("||") + ")"]
else
[field, name]
end.join(":")
end
-
+
+ ## if we see a label:deleted or a label:spam term anywhere in the query
+ ## string, we set the extra load_spam or load_deleted options to true.
+ ## bizarre? well, because the query allows arbitrary parenthesized boolean
+ ## expressions, without fully parsing the query, we can't tell whether
+ ## the user is explicitly directing us to search spam messages or not.
+ ## e.g. if the string is -(-(-(-(-label:spam)))), does the user want to
+ ## search spam messages or not?
+ ##
+ ## so, we rely on the fact that turning these extra options ON turns OFF
+ ## the adding of "-label:deleted" or "-label:spam" terms at the very
+ ## final stage of query processing. if the user wants to search spam
+ ## messages, not adding that is the right thing; if he doesn't want to
+ ## search spam messages, then not adding it won't have any effect.
+ extraopts[:load_spam] = true if subs =~ /\blabel:spam\b/
+ extraopts[:load_deleted] = true if subs =~ /\blabel:deleted\b/
+
+ ## gmail style "is" operator
+ subs = subs.gsub(/\b(is):(\S+)\b/) do
+ field, label = $1, $2
+ case label
+ when "read"
+ "-label:unread"
+ when "spam"
+ extraopts[:load_spam] = true
+ "label:spam"
+ when "deleted"
+ extraopts[:load_deleted] = true
+ "label:deleted"
+ else
+ "label:#{$2}"
+ end
+ end
+
if $have_chronic
chronic_failure = false
- result = result.gsub(/\b(before|on|in|after):(\((.+?)\)\B|(\S+)\b)/) do
+ subs = subs.gsub(/\b(before|on|in|during|after):(\((.+?)\)\B|(\S+)\b)/) do
break if chronic_failure
field, datestr = $1, ($3 || $4)
realdate = Chronic.parse(datestr, :guess => false, :context => :none)
if realdate
case field
@@ -409,18 +468,22 @@
else
Redwood::log "chronic: translated #{field}:#{datestr} to #{realdate}"
"date:(<= #{sprintf "%012d", realdate.end.to_i}) date:(>= #{sprintf "%012d", realdate.begin.to_i})"
end
else
- BufferManager.flash "Don't understand date #{datestr.inspect}!"
+ BufferManager.flash "Can't understand date #{datestr.inspect}!"
chronic_failure = true
end
end
- result = nil if chronic_failure
+ subs = nil if chronic_failure
end
- Redwood::log "translated #{str.inspect} to #{result}" unless result == str
- @qparser.parse result if result
+ Redwood::log "translated #{norms.inspect} to #{subs.inspect}" unless subs == norms
+ if subs
+ [@qparser.parse(subs), extraopts]
+ else
+ nil
+ end
end
def build_query opts
query = Ferret::Search::BooleanQuery.new
query.add_query opts[:qobj], :must if opts[:qobj]