lib/sup/index.rb in sup-0.12.1 vs lib/sup/index.rb in sup-0.13.0
- old
+ new
@@ -11,19 +11,22 @@
rescue LoadError => e
debug "No 'chronic' gem detected. Install it for date/time query restrictions."
$have_chronic = false
end
+if ([Xapian.major_version, Xapian.minor_version, Xapian.revision] <=> [1,2,1]) < 0
+ fail "Xapian version 1.2.1 or higher required"
+end
+
module Redwood
# This index implementation uses Xapian for searching and storage. It
# tends to be slightly faster than Ferret for indexing and significantly faster
# for searching due to precomputing thread membership.
class Index
include InteractiveLock
- STEM_LANGUAGE = "english"
INDEX_VERSION = '4'
## dates are converted to integers for xapian, and are used for document ids,
## so we must ensure they're reasonably valid. this typically only affect
## spam.
@@ -204,11 +207,13 @@
m = Message.new :locations => locations,
:labels => entry[:labels],
:snippet => entry[:snippet]
- mk_person = lambda { |x| Person.new(*x.reverse!) }
+ # Try to find person from contacts before falling back to
+ # generating it from the address.
+ mk_person = lambda { |x| Person.from_name_and_email(*x.reverse!) }
entry[:from] = mk_person[entry[:from]]
entry[:to].map!(&mk_person)
entry[:cc].map!(&mk_person)
entry[:bcc].map!(&mk_person)
@@ -229,11 +234,11 @@
each_id_by_date :participants => email_addresses do |id,b|
break if contacts.size >= num
m = b.call
([m.from]+m.to+m.cc+m.bcc).compact.each { |p| contacts << [p.name, p.email] }
end
- contacts.to_a.compact.map { |n,e| Person.new n, e }[0...num]
+ contacts.to_a.compact[0...num].map { |n,e| Person.from_name_and_email n, e }
end
## Yield each message-id matching query
EACH_ID_PAGE = 100
def each_id query={}
@@ -420,16 +425,16 @@
debug "translated query: #{subs.inspect}"
qp = Xapian::QueryParser.new
qp.database = @xapian
- qp.stemmer = Xapian::Stem.new(STEM_LANGUAGE)
+ qp.stemmer = Xapian::Stem.new($config[:stem_language])
qp.stemming_strategy = Xapian::QueryParser::STEM_SOME
qp.default_op = Xapian::Query::OP_AND
qp.add_valuerangeprocessor(Xapian::NumberValueRangeProcessor.new(DATE_VALUENO, 'date:', true))
- NORMAL_PREFIX.each { |k,vs| vs.each { |v| qp.add_prefix k, v } }
- BOOLEAN_PREFIX.each { |k,vs| vs.each { |v| qp.add_boolean_prefix k, v } }
+ NORMAL_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_prefix k, v } }
+ BOOLEAN_PREFIX.each { |k,info| info[:prefix].each { |v| qp.add_boolean_prefix k, v, info[:exclusive] } }
begin
xapian_query = qp.parse_query(subs, Xapian::QueryParser::FLAG_PHRASE|Xapian::QueryParser::FLAG_BOOLEAN|Xapian::QueryParser::FLAG_LOVEHATE|Xapian::QueryParser::FLAG_WILDCARD)
rescue RuntimeError => e
raise ParseError, "xapian query parser error: #{e}"
@@ -476,35 +481,35 @@
private
# Stemmed
NORMAL_PREFIX = {
- 'subject' => 'S',
- 'body' => 'B',
- 'from_name' => 'FN',
- 'to_name' => 'TN',
- 'name' => %w(FN TN),
- 'attachment' => 'A',
- 'email_text' => 'E',
- '' => %w(S B FN TN A E),
+ 'subject' => {:prefix => 'S', :exclusive => false},
+ 'body' => {:prefix => 'B', :exclusive => false},
+ 'from_name' => {:prefix => 'FN', :exclusive => false},
+ 'to_name' => {:prefix => 'TN', :exclusive => false},
+ 'name' => {:prefix => %w(FN TN), :exclusive => false},
+ 'attachment' => {:prefix => 'A', :exclusive => false},
+ 'email_text' => {:prefix => 'E', :exclusive => false},
+ '' => {:prefix => %w(S B FN TN A E), :exclusive => false},
}
# Unstemmed
BOOLEAN_PREFIX = {
- 'type' => 'K',
- 'from_email' => 'FE',
- 'to_email' => 'TE',
- 'email' => %w(FE TE),
- 'date' => 'D',
- 'label' => 'L',
- 'source_id' => 'I',
- 'attachment_extension' => 'O',
- 'msgid' => 'Q',
- 'id' => 'Q',
- 'thread' => 'H',
- 'ref' => 'R',
- 'location' => 'J',
+ 'type' => {:prefix => 'K', :exclusive => true},
+ 'from_email' => {:prefix => 'FE', :exclusive => false},
+ 'to_email' => {:prefix => 'TE', :exclusive => false},
+ 'email' => {:prefix => %w(FE TE), :exclusive => false},
+ 'date' => {:prefix => 'D', :exclusive => true},
+ 'label' => {:prefix => 'L', :exclusive => false},
+ 'source_id' => {:prefix => 'I', :exclusive => true},
+ 'attachment_extension' => {:prefix => 'O', :exclusive => false},
+ 'msgid' => {:prefix => 'Q', :exclusive => true},
+ 'id' => {:prefix => 'Q', :exclusive => true},
+ 'thread' => {:prefix => 'H', :exclusive => false},
+ 'ref' => {:prefix => 'R', :exclusive => false},
+ 'location' => {:prefix => 'J', :exclusive => false},
}
PREFIX = NORMAL_PREFIX.merge BOOLEAN_PREFIX
MSGID_VALUENO = 0
@@ -666,25 +671,25 @@
## Index content that can't be changed by the user
def index_message_static m, doc, entry
# Person names are indexed with several prefixes
person_termer = lambda do |d|
lambda do |p|
- doc.index_text p.name, PREFIX["#{d}_name"] if p.name
- doc.index_text p.email, PREFIX['email_text']
+ doc.index_text p.name, PREFIX["#{d}_name"][:prefix] if p.name
+ doc.index_text p.email, PREFIX['email_text'][:prefix]
doc.add_term mkterm(:email, d, p.email)
end
end
person_termer[:from][m.from] if m.from
(m.to+m.cc+m.bcc).each(&(person_termer[:to]))
# Full text search content
subject_text = m.indexable_subject
body_text = m.indexable_body
- doc.index_text subject_text, PREFIX['subject']
- doc.index_text body_text, PREFIX['body']
- m.attachments.each { |a| doc.index_text a, PREFIX['attachment'] }
+ doc.index_text subject_text, PREFIX['subject'][:prefix]
+ doc.index_text body_text, PREFIX['body'][:prefix]
+ m.attachments.each { |a| doc.index_text a, PREFIX['attachment'][:prefix] }
# Miscellaneous terms
doc.add_term mkterm(:date, m.date) if m.date
doc.add_term mkterm(:type, 'mail')
doc.add_term mkterm(:msgid, m.id)
@@ -758,29 +763,29 @@
# Construct a Xapian term
def mkterm type, *args
case type
when :label
- PREFIX['label'] + args[0].to_s.downcase
+ PREFIX['label'][:prefix] + args[0].to_s.downcase
when :type
- PREFIX['type'] + args[0].to_s.downcase
+ PREFIX['type'][:prefix] + args[0].to_s.downcase
when :date
- PREFIX['date'] + args[0].getutc.strftime("%Y%m%d%H%M%S")
+ PREFIX['date'][:prefix] + args[0].getutc.strftime("%Y%m%d%H%M%S")
when :email
case args[0]
- when :from then PREFIX['from_email']
- when :to then PREFIX['to_email']
+ when :from then PREFIX['from_email'][:prefix]
+ when :to then PREFIX['to_email'][:prefix]
else raise "Invalid email term type #{args[0]}"
end + args[1].to_s.downcase
when :source_id
- PREFIX['source_id'] + args[0].to_s.downcase
+ PREFIX['source_id'][:prefix] + args[0].to_s.downcase
when :location
- PREFIX['location'] + [args[0]].pack('n') + args[1].to_s
+ PREFIX['location'][:prefix] + [args[0]].pack('n') + args[1].to_s
when :attachment_extension
- PREFIX['attachment_extension'] + args[0].to_s.downcase
+ PREFIX['attachment_extension'][:prefix] + args[0].to_s.downcase
when :msgid, :ref, :thread
- PREFIX[type.to_s] + args[0][0...(MAX_TERM_LENGTH-1)]
+ PREFIX[type.to_s][:prefix] + args[0][0...(MAX_TERM_LENGTH-1)]
else
raise "Invalid term type #{type}"
end
end
end
@@ -796,10 +801,10 @@
self.data = Marshal.dump x
end
def index_text text, prefix, weight=1
term_generator = Xapian::TermGenerator.new
- term_generator.stemmer = Xapian::Stem.new(Redwood::Index::STEM_LANGUAGE)
+ term_generator.stemmer = Xapian::Stem.new($config[:stem_language])
term_generator.document = self
term_generator.index_text text, weight, prefix
end
alias old_add_term add_term