require 'rsolr'
module RailsConnector
# This class provides a basic implementation for accessing a Solr search server.
# It can be activated by making it the superclass of SearchRequest
# (instead of DefaultSearchRequest).
# It should be customized by subclassing.
class LuceneSearchRequest
attr_reader :query_string
# Sanitizes the given +query_string+ and takes +options+ for accessing Solr.
#
# +options+ is a hash and may include:
#
# :limit:: The maximum number of hits
# :offset:: The search offset
# :solr_url:: A non-default Solr server URL
# :filter_query:: See #filter_query_conditions
# :solr_parameters:: A hash of additional query parameters (e. g. +timeAllowed+, +sort+)
# :lazy_load:: when set to false (the default), the hits will load their associated Objs immediatley. Otherwise they will be loaded lazyly (when accessed through Hit#obj). Note that loading lazyly may expose hits that do not have an Obj, i.e. they return nil when Hit#obj is invoked. When loading immediately, those hits are filtered.
def initialize(query_string, options = nil)
@query_string = self.class.sanitize(query_string)
@options = Configuration.search_options.merge(options || {})
end
# Accesses Solr and fetches search hits.
#
# Uses the #filter_query and +options+ given in #new.
def fetch_hits
solr = RSolr.connect(:url => @options[:solr_url])
solr_result = solr.get("select", :params => solr_query_parameters)
solr_response = solr_result['response']
build_search_result(solr_response['numFound'], solr_response['docs'], solr_response['maxScore'])
end
# Removes unwanted characters from +text+.
def self.sanitize(text)
text.gsub(/[^\w\*]/, ' ').gsub(/\s+/, ' ').strip
end
def solr_query_for_query_string
@query_string.downcase.split(/\s+/).map do |word|
word unless %w(and or not).include?(word)
end.compact.join(" AND ")
end
# Combines filter query conditions (see #filter_query_conditions).
#
# A filter query is used to reduce the number of documents before executing the actual query.
# By default, all filter query conditions must be met, each is passed on as a separate filter query.
def filter_query
filter_query_conditions.values.compact
end
# A hash of conditions, combined to a filter query by #filter_query.
# Note that all values of the hash must be valid Solr syntax.
# The keys have no meaning and exist only so single conditions can be replaced
# in a subclass:
#
# class SearchRequest < LuceneSearchRequest
# def filter_query_conditions
# super.merge(:subset => 'path:/de/*')
# end
# end
def filter_query_conditions
conditions = {}
conditions[:object_type] = 'NOT object_type:image'
conditions[:suppress_export] = 'NOT suppress_export:1'
now = Time.now
conditions[:valid_from] = "NOT valid_from:[#{now.succ.to_iso} TO *]"
conditions[:valid_until] = "NOT valid_until:[* TO #{now.to_iso}]"
conditions.merge(@options[:filter_query] || {})
end
private
def solr_query_parameters
{
:q => solr_query_for_query_string,
:fq => filter_query,
:fl => 'id,score',
:start => @options[:offset],
:rows => @options[:limit]
}.merge(@options[:solr_parameters] || {})
end
def build_search_result(total_hits, docs, max_score)
result = SES::SearchResult.new(total_hits)
docs.each do |doc|
begin
id = doc['id']
score = doc['score'] / max_score
hit = SES::Hit.new(id, score, doc)
if @options[:lazy_load].blank? && hit.obj.blank?
Rails.logger.warn("OBJ with ID ##{doc['id']} not found: This search result will not be shown")
else
result << hit
end
end
end
result
end
end
end