lib/sup/index.rb in sup-0.3 vs lib/sup/index.rb in sup-0.4

- old
+ new

@@ -169,21 +169,28 @@ else m.source.id or raise "unregistered source #{m.source} (id #{m.source.id.inspect})" end to = (m.to + m.cc + m.bcc).map { |x| x.email }.join(" ") + snippet = + if m.snippet_contains_encrypted_content? && $config[:discard_snippets_from_encrypted_messages] + "" + else + m.snippet + end + d = { :message_id => m.id, :source_id => source_id, :source_info => m.source_info, :date => m.date.to_indexable_s, :body => m.content, - :snippet => m.snippet, + :snippet => snippet, :label => m.labels.uniq.join(" "), :from => m.from ? m.from.email : "", :to => (m.to + m.cc + m.bcc).map { |x| x.email }.join(" "), - :subject => wrap_subj(Message.normalize_subj(m.subj)), + :subject => wrap_subj(m.subj), :refs => (m.refs + m.replytos).uniq.join(" "), } @index.delete docid if docid @index.add_document d @@ -234,10 +241,11 @@ ## ## only two options, :limit and :skip_killed. if :skip_killed is ## true, stops loading any thread if a message with a :killed flag ## is found. SAME_SUBJECT_DATE_LIMIT = 7 + MAX_CLAUSES = 1000 def each_message_in_thread_for m, opts={} #Redwood::log "Building thread for #{m.id}: #{m.subj}" messages = {} searched = {} num_queries = 0 @@ -262,17 +270,20 @@ pending = [m.id] end until pending.empty? || (opts[:limit] && messages.size >= opts[:limit]) q = Ferret::Search::BooleanQuery.new true + # this disappeared in newer ferrets... wtf. + # q.max_clause_count = 2048 - pending.each do |id| + lim = [MAX_CLAUSES / 2, pending.length].min + pending[0 ... lim].each do |id| searched[id] = true q.add_query Ferret::Search::TermQuery.new(:message_id, id), :should q.add_query Ferret::Search::TermQuery.new(:refs, id), :should end - pending = [] + pending = pending[lim .. -1] q = build_query :qobj => q num_queries += 1 killed = false @@ -311,11 +322,11 @@ fake_header = { "date" => Time.at(doc[:date].to_i), "subject" => unwrap_subj(doc[:subject]), "from" => doc[:from], - "to" => doc[:to], + "to" => doc[:to].split(/\s+/).join(", "), # reformat "message-id" => doc[:message_id], "references" => doc[:refs].split(/\s+/).map { |x| "<#{x}>" }.join(" "), } Message.new :source => source, :source_info => doc[:source_info].to_i, @@ -380,23 +391,71 @@ protected ## do any specialized parsing ## returns nil and flashes error message if parsing failed - def parse_user_query_string str - result = str.gsub(/\b(to|from):(\S+)\b/) do + def parse_user_query_string s + extraopts = {} + + ## this is a little hacky, but it works, at least until ferret changes + ## its api. we parse the user query string with ferret twice: the first + ## time we just turn the resulting object back into a string, which has + ## the next effect of transforming the original string into a nice + ## normalized form with + and - instead of AND, OR, etc. then we do some + ## string substitutions which depend on this normalized form, re-parse + ## the string with Ferret, and return the resulting query object. + + norms = @qparser.parse(s).to_s + Redwood::log "normalized #{s.inspect} to #{norms.inspect}" unless s == norms + + subs = norms.gsub(/\b(to|from):(\S+)\b/) do field, name = $1, $2 if(p = ContactManager.contact_for(name)) [field, p.email] + elsif name == "me" + [field, "(" + AccountManager.user_emails.join("||") + ")"] else [field, name] end.join(":") end - + + ## if we see a label:deleted or a label:spam term anywhere in the query + ## string, we set the extra load_spam or load_deleted options to true. + ## bizarre? well, because the query allows arbitrary parenthesized boolean + ## expressions, without fully parsing the query, we can't tell whether + ## the user is explicitly directing us to search spam messages or not. + ## e.g. if the string is -(-(-(-(-label:spam)))), does the user want to + ## search spam messages or not? + ## + ## so, we rely on the fact that turning these extra options ON turns OFF + ## the adding of "-label:deleted" or "-label:spam" terms at the very + ## final stage of query processing. if the user wants to search spam + ## messages, not adding that is the right thing; if he doesn't want to + ## search spam messages, then not adding it won't have any effect. + extraopts[:load_spam] = true if subs =~ /\blabel:spam\b/ + extraopts[:load_deleted] = true if subs =~ /\blabel:deleted\b/ + + ## gmail style "is" operator + subs = subs.gsub(/\b(is):(\S+)\b/) do + field, label = $1, $2 + case label + when "read" + "-label:unread" + when "spam" + extraopts[:load_spam] = true + "label:spam" + when "deleted" + extraopts[:load_deleted] = true + "label:deleted" + else + "label:#{$2}" + end + end + if $have_chronic chronic_failure = false - result = result.gsub(/\b(before|on|in|after):(\((.+?)\)\B|(\S+)\b)/) do + subs = subs.gsub(/\b(before|on|in|during|after):(\((.+?)\)\B|(\S+)\b)/) do break if chronic_failure field, datestr = $1, ($3 || $4) realdate = Chronic.parse(datestr, :guess => false, :context => :none) if realdate case field @@ -409,18 +468,22 @@ else Redwood::log "chronic: translated #{field}:#{datestr} to #{realdate}" "date:(<= #{sprintf "%012d", realdate.end.to_i}) date:(>= #{sprintf "%012d", realdate.begin.to_i})" end else - BufferManager.flash "Don't understand date #{datestr.inspect}!" + BufferManager.flash "Can't understand date #{datestr.inspect}!" chronic_failure = true end end - result = nil if chronic_failure + subs = nil if chronic_failure end - Redwood::log "translated #{str.inspect} to #{result}" unless result == str - @qparser.parse result if result + Redwood::log "translated #{norms.inspect} to #{subs.inspect}" unless subs == norms + if subs + [@qparser.parse(subs), extraopts] + else + nil + end end def build_query opts query = Ferret::Search::BooleanQuery.new query.add_query opts[:qobj], :must if opts[:qobj]