lib/sup/index.rb in sup-0.12.1 vs lib/sup/index.rb in sup-0.13.0

- old
+ new

@@ -11,19 +11,22 @@ rescue LoadError => e debug "No 'chronic' gem detected. Install it for date/time query restrictions." $have_chronic = false end +if ([Xapian.major_version, Xapian.minor_version, Xapian.revision] <=> [1,2,1]) < 0 + fail "Xapian version 1.2.1 or higher required" +end + module Redwood # This index implementation uses Xapian for searching and storage. It # tends to be slightly faster than Ferret for indexing and significantly faster # for searching due to precomputing thread membership. class Index include InteractiveLock - STEM_LANGUAGE = "english" INDEX_VERSION = '4' ## dates are converted to integers for xapian, and are used for document ids, ## so we must ensure they're reasonably valid. this typically only affect ## spam. @@ -204,11 +207,13 @@ m = Message.new :locations => locations, :labels => entry[:labels], :snippet => entry[:snippet] - mk_person = lambda { |x| Person.new(*x.reverse!) } + # Try to find person from contacts before falling back to + # generating it from the address. + mk_person = lambda { |x| Person.from_name_and_email(*x.reverse!) } entry[:from] = mk_person[entry[:from]] entry[:to].map!(&mk_person) entry[:cc].map!(&mk_person) entry[:bcc].map!(&mk_person) @@ -229,11 +234,11 @@ each_id_by_date :participants => email_addresses do |id,b| break if contacts.size >= num m = b.call ([m.from]+m.to+m.cc+m.bcc).compact.each { |p| contacts << [p.name, p.email] } end - contacts.to_a.compact.map { |n,e| Person.new n, e }[0...num] + contacts.to_a.compact[0...num].map { |n,e| Person.from_name_and_email n, e } end ## Yield each message-id matching query EACH_ID_PAGE = 100 def each_id query={} @@ -420,16 +425,16 @@ debug "translated query: #{subs.inspect}" qp = Xapian::QueryParser.new qp.database = @xapian - qp.stemmer = Xapian::Stem.new(STEM_LANGUAGE) + qp.stemmer = Xapian::Stem.new($config[:stem_language]) qp.stemming_strategy = Xapian::QueryParser::STEM_SOME qp.default_op = Xapian::Query::OP_AND qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true)) - NORMAL_PREFIX.each { |k,vs| vs.each { |v| qp.add_prefix k, v } } - BOOLEAN_PREFIX.each { |k,vs| vs.each { |v| qp.add_boolean_prefix k, v } } + NORMAL_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_prefix k, v } } + BOOLEAN_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_boolean_prefix k, v, info[:exclusive] } } begin xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD) rescue RuntimeError => e raise ParseError, "xapian query parser error: #{e}" @@ -476,35 +481,35 @@ private # Stemmed NORMAL_PREFIX = { - 'subject' => 'S', - 'body' => 'B', - 'from_name' => 'FN', - 'to_name' => 'TN', - 'name' => %w(FN TN), - 'attachment' => 'A', - 'email_text' => 'E', - '' => %w(S B FN TN A E), + 'subject' => {:prefix => 'S', :exclusive => false}, + 'body' => {:prefix => 'B', :exclusive => false}, + 'from_name' => {:prefix => 'FN', :exclusive => false}, + 'to_name' => {:prefix => 'TN', :exclusive => false}, + 'name' => {:prefix => %w(FN TN), :exclusive => false}, + 'attachment' => {:prefix => 'A', :exclusive => false}, + 'email_text' => {:prefix => 'E', :exclusive => false}, + '' => {:prefix => %w(S B FN TN A E), :exclusive => false}, } # Unstemmed BOOLEAN_PREFIX = { - 'type' => 'K', - 'from_email' => 'FE', - 'to_email' => 'TE', - 'email' => %w(FE TE), - 'date' => 'D', - 'label' => 'L', - 'source_id' => 'I', - 'attachment_extension' => 'O', - 'msgid' => 'Q', - 'id' => 'Q', - 'thread' => 'H', - 'ref' => 'R', - 'location' => 'J', + 'type' => {:prefix => 'K', :exclusive => true}, + 'from_email' => {:prefix => 'FE', :exclusive => false}, + 'to_email' => {:prefix => 'TE', :exclusive => false}, + 'email' => {:prefix => %w(FE TE), :exclusive => false}, + 'date' => {:prefix => 'D', :exclusive => true}, + 'label' => {:prefix => 'L', :exclusive => false}, + 'source_id' => {:prefix => 'I', :exclusive => true}, + 'attachment_extension' => {:prefix => 'O', :exclusive => false}, + 'msgid' => {:prefix => 'Q', :exclusive => true}, + 'id' => {:prefix => 'Q', :exclusive => true}, + 'thread' => {:prefix => 'H', :exclusive => false}, + 'ref' => {:prefix => 'R', :exclusive => false}, + 'location' => {:prefix => 'J', :exclusive => false}, } PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX MSGID_VALUENO = 0 @@ -666,25 +671,25 @@ ## Index content that can't be changed by the user def index_message_static m, doc, entry # Person names are indexed with several prefixes person_termer = lambda do |d| lambda do |p| - doc.index_text p.name, PREFIX["#{d}_name"] if p.name - doc.index_text p.email, PREFIX['email_text'] + doc.index_text p.name, PREFIX["#{d}_name"][:prefix] if p.name + doc.index_text p.email, PREFIX['email_text'][:prefix] doc.add_term mkterm(:email, d, p.email) end end person_termer[:from][m.from] if m.from (m.to+m.cc+m.bcc).each(&(person_termer[:to])) # Full text search content subject_text = m.indexable_subject body_text = m.indexable_body - doc.index_text subject_text, PREFIX['subject'] - doc.index_text body_text, PREFIX['body'] - m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] } + doc.index_text subject_text, PREFIX['subject'][:prefix] + doc.index_text body_text, PREFIX['body'][:prefix] + m.attachments.each { |a| doc.index_text a, PREFIX['attachment'][:prefix] } # Miscellaneous terms doc.add_term mkterm(:date, m.date) if m.date doc.add_term mkterm(:type, 'mail') doc.add_term mkterm(:msgid, m.id) @@ -758,29 +763,29 @@ # Construct a Xapian term def mkterm type, *args case type when :label - PREFIX['label'] + args[0].to_s.downcase + PREFIX['label'][:prefix] + args[0].to_s.downcase when :type - PREFIX['type'] + args[0].to_s.downcase + PREFIX['type'][:prefix] + args[0].to_s.downcase when :date - PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S") + PREFIX['date'][:prefix] + args[0].getutc.strftime("%Y%m%d%H%M%S") when :email case args[0] - when :from then PREFIX['from_email'] - when :to then PREFIX['to_email'] + when :from then PREFIX['from_email'][:prefix] + when :to then PREFIX['to_email'][:prefix] else raise "Invalid email term type #{args[0]}" end + args[1].to_s.downcase when :source_id - PREFIX['source_id'] + args[0].to_s.downcase + PREFIX['source_id'][:prefix] + args[0].to_s.downcase when :location - PREFIX['location'] + [args[0]].pack('n') + args[1].to_s + PREFIX['location'][:prefix] + [args[0]].pack('n') + args[1].to_s when :attachment_extension - PREFIX['attachment_extension'] + args[0].to_s.downcase + PREFIX['attachment_extension'][:prefix] + args[0].to_s.downcase when :msgid, :ref, :thread - PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)] + PREFIX[type.to_s][:prefix] + args[0][0...(MAX_TERM_LENGTH-1)] else raise "Invalid term type #{type}" end end end @@ -796,10 +801,10 @@ self.data = Marshal.dump x end def index_text text, prefix, weight=1 term_generator = Xapian::TermGenerator.new - term_generator.stemmer = Xapian::Stem.new(Redwood::Index::STEM_LANGUAGE) + term_generator.stemmer = Xapian::Stem.new($config[:stem_language]) term_generator.document = self term_generator.index_text text, weight, prefix end alias old_add_term add_term