# encoding: UTF-8
module Scrivito
# Provides an enumerator for iterating over the results of searches for CMS objects to retrieve instances of these objects.
# This is achieved through the {http://ruby-doc.org/core-2.1.3/Enumerable.html Enumerable
mixin},
# which provides methods such as map
, select
or take
.
#
# This enumerator is lazy. If, for example, you are looking for {BasicObj Obj}s whose object class is "Publication",
# and there are 93 objects in total, then enum.take(10)
fetches the first 10 objects only,
# ignoring the other 83.
# This implies that repeatedly iterating over this enumerator causes the search results and the objects to be fetched again and again.
# If you want to get all objects at once, use enum.to_a
.
#
# To start searching, use one of the {BasicObj Obj} methods that return an {ObjSearchEnumerator}. The preferred way is to start with {BasicObj.where Obj.where}.
#
# == Currently available fields and their values
#
# [+:*+] Searches all fields.
# This is only possible with the +contains+ and +starts_with+ operators.
# [+:id+] Id of an {BasicObj Obj}. This is a +string+ field.
# [+:_path+] Path of an {BasicObj Obj}. This is a +string+ field.
# [+:_name+] Name of an {BasicObj Obj}. This is a +string+ field.
# [+:title+] Title of an {BasicObj Obj}. This is a +string+ field.
# [+:body+] Body of an {BasicObj Obj}. This is an +html+ field. Thus, only the +contains+ and
# +contains_prefix+ operators can be applied to this field.
# [+:_obj_class+] Object class of an {BasicObj Obj}. This is a +string+ field.
# [+:_permalink+] Permalink of an {BasicObj Obj}. This is a +string+ field.
# [+:_last_changed+] Date of last change of an {BasicObj Obj}.
# [every :custom_attribute
] Custom attribute of an {BasicObj Obj}. Note that
# depending on the attribute type (e.g. an
# +html+ field), some operators cannot be applied.
#
# All values are stored as strings.
#
# Date values are stored in the format YYYYMMDDHHMMSS in UTC. For example, 2000-01-01 00:00:00 UTC is stored as "20000101000000
".
# This is relevant for string comparisons in which the +is_less_than+ and +is_greater_than+ operators are used.
#
# == Currently available operators
#
# For +:contains+ and +:contains_prefix+, the examples are based on the following field value:
# "Behind every cloud is another cloud."
#
# [+:contains+] Searches for one or more whole words. Each word needs to be present.
#
# Example subquery values:
#
# ✔ "behind cloud" (case insensitive)
#
# ✘ "behi clo" (not whole words)
#
# ✘ "behind everything" (second word does not match)
# [+:contains_prefix+] Searches for one prefix. A whole word also counts as a prefix.
#
# Example subquery values:
#
# ✔ "Clou" (case insensitive)
#
# ✔ "Every" (case insensitive)
#
# For +:equals+ and +:starts_with+, the examples are based on the following field value:
# "Some content."
#
# [+:equals+] The +field+ value needs to be identical to the +value+ of this subquery.
#
# Only applicable to +string+, +enum+, +multienum+ and +date+ fields.
#
# Example subquery values:
#
# ✔ "Some content." (exact value)
#
# ✘ "Some" (not exact value)
#
# [+:starts_with+] The +field+ value needs to start exactly with the +value+ of this subquery.
#
# Only applicable to +string+, +enum+, +multienum+ and +date+ fields.
#
# Example subquery values:
#
# ✔ "Som" (prefix of the value)
#
# ✘ "som" (incorrect case of prefix)
#
# ✘ "content" (not prefix of the whole value)
#
# For +:is_less_than+ and +:is_greater_than+, the examples are based on the following field value (date string):
# "20000101000000"
#
# [+:is_less_than+] Matches if the field string value is less than the subquery string value.
#
# Only applicable to +string+, +enum+, +multienum+ and +date+ fields.
#
# Example subquery values:
#
# ✔ "19991231235959" (is less than "20000101000000")
#
# ✘ "20000101000000" (equal, not less than)
#
# [+:is_greater_than+] Matches if the field string value is greater than the subquery string value.
#
# Only applicable to +string+, +enum+, +multienum+ and +date+ fields.
#
# Example subquery values:
#
# ✔ "20000101000001" (is greater than "20000101000000")
#
# ✘ "20000101000000" (equal, not greater than)
#
# @api public
class ObjSearchEnumerator
include Enumerable
attr_reader :workspace
attr_reader :query
def initialize(workspace)
@workspace = workspace
@options = {}
end
# @group Chainable methods
# Adds the given AND subquery to this {ObjSearchEnumerator}.
#
# Compares the +field(s)+ with the +value(s)+ using the +operator+ of this subquery.
# All CMS objects to which this criterion applies remain in the result set.
#
# @param [Symbol, String, Array] field Name(s) of the field(s) to be searched.
# For arrays, the subquery matches if one or more of these fields meet this criterion.
# @param [Symbol, String] operator See "Currently available operators" above.
# @param [String, Array] value The value(s) to compare with the field value(s) using the +operator+ of this subquery.
# For arrays, the subquery matches if the condition is met for one or more of the array elements.
# @param [Hash] boost A hash where the keys are field names and their values are boosting factors.
# Boosting factors must be in the range from 1 to 10.
# Boosting can only be applied to subqueries in which the +contains+ or +contains_prefix+ operator is used.
# @return [ObjSearchEnumerator]
# @api public
def and(field, operator, value, boost = nil)
real_operator = operator_mapping(operator)
subquery = {:field => field, :operator => real_operator, :value => convert_value(value)}
if boost.present?
valid_boost_operators = [:contains, :contains_prefix]
if valid_boost_operators.include?(operator.to_sym)
subquery[:boost] = boost
else
raise "Boost is not allowed with operator '#{operator}'. " +
"Valid operators are: #{valid_boost_operators.join(', ')}"
end
end
@size = nil
@query = (query || []) + [subquery]
self
end
# Adds the given negated AND subquery to this {ObjSearchEnumerator}.
#
# Compares the +field(s)+ with the +value(s)+ using the negated +operator+ of this subquery.
# All CMS objects to which this criterion applies are removed from the result set.
#
# @param [Symbol, String, Array] field Name(s) of the field(s) to be searched.
# For arrays, the subquery matches if one or more of these fields meet this criterion.
# @param [Symbol, String] operator Only applicable to subqueries in which the +equals+,
# +starts_with+, +is_greater_than+ or +is_less_than+ operator is used.
# (See "Currently available operators" above).
# @param [String, Array] value The value(s) to compare with the field value(s) using the +operator+ of this subquery.
# For arrays, the subquery matches if the condition is met for one or more of the array elements.
# @return [ObjSearchEnumerator]
# @api public
def and_not(field, operator, value)
real_operator = operator_mapping(operator)
valid_negated_operators = [:equals, :starts_with, :is_greater_than, :is_less_than]
unless valid_negated_operators.include?(operator.to_sym)
raise "Negating operator '#{operator}' is not valid."
end
subquery = {:field => field, :operator => real_operator, :value => convert_value(value),
:negate => true}
@size = nil
@query = (query || []) + [subquery]
self
end
# Orders the results by +field_name+.
# @param [Symbol, String] field_name This parameter specifies the field by which the hits are sorted (e.g. +:_path+).
# @return [ObjSearchEnumerator]
# @api public
def order(field_name)
options[:sort_by] = field_name
self
end
# Reverses the order of the results. Requires {#order} to be applied before.
# @return [ObjSearchEnumerator]
# @api public
def reverse_order
options[:sort_by].present? or raise "A search order has to be specified"\
" before reverse_order can be applied."
@reverse_order = !@reverse_order
self
end
# Number of search results to be returned by each of the internal search requests.
# The default is +10+. The server may reduce large batches to a reasonable size.
# @param [Integer] size A value in the range from +1+ to +100+.
# @return [ObjSearchEnumerator]
# @api public
def batch_size(size)
options[:size] = size
self
end
# Omits the first +amount+ of {BasicObj Obj}s from the results. The default is +0+.
# @param [Integer] amount
# @return [ObjSearchEnumerator]
# @api public
def offset(amount)
options[:offset] ||= 0
options[:offset] += amount
self
end
def include_deleted
@include_deleted = true
self
end
# @!endgroup
# Iterates over the search result, yielding {BasicObj Obj}.
# @yield [Obj]
# @return [void]
# @api public
def each
offset = options[:offset] || 0
current_batch, total = fetch_next_batch(offset)
loop do
if current_batch.size == 0
if offset < total
current_batch, total = fetch_next_batch(offset)
else
raise StopIteration
end
end
offset += 1
hit = current_batch.shift
yield hit
end
end
# The total number of hits.
# @return [Integer]
# @api public
def size
return @size if @size
size_query = {
query: query,
size: 0
}
if @include_deleted
size_query[:options] = {
include_deleted: true
}
end
@size ||= CmsBackend.instance.search_objs(workspace, size_query)['total'].to_i
end
# Loads a single batch of search results from the backend.
# @return [Array] of {BasicObj Obj}.
# Usually returns +batch_size+ results if available,
# but may occasionally return fewer than +batch_size+ results (due to rate limit, for example).
# @api public
def load_batch
next_batch = fetch_next_batch(options[:offset] || 0)
next_batch.first
end
# @api public
alias_method :length, :size
# @api public
alias_method :count, :size
private
attr_reader :options
def convert_value(value)
if value.kind_of?(Array)
value.map{ |v| convert_single_value(v) }
else
convert_single_value(value)
end
end
def convert_single_value(value)
if value.is_a?(Time) || value.is_a?(Date)
DateAttribute.serialize(value)
else
value.to_s
end
end
def operator_mapping(operator)
case operator.to_sym
when :contains
:search
when :contains_prefix
:prefix_search
when :equals
:equal
when :starts_with
:prefix
when :is_greater_than
:greater_than
when :is_less_than
:less_than
else
raise "Operator '#{operator}'' is not valid!"
end
end
def fetch_next_batch(offset)
request_result = CmsBackend.instance.search_objs(workspace, search_dsl(offset))
obj_ids = request_result['results'].map { |result| result['id'] || result['_id'] }
objs = workspace.objs.find_including_deleted(obj_ids)
@size = request_result['total'].to_i
[objs, @size]
end
def search_dsl(offset)
patches = {
offset: offset,
query: query,
}
if @reverse_order
patches[:sort_order] = options[:sort_by].present? ? :desc : :asc
end
if @include_deleted
patches[:options] = {
include_deleted: true
}
end
options.merge(patches)
end
end
end