# encoding: UTF-8 module Scrivito # Provides an enumerator for iterating over obj search results and retrieving obj instances. # This is done using the {http://ruby-doc.org/core-1.8.7/Enumerable.html Enumerable mixin}, # which provides methods such as map, select or take. # # This enumerator is lazy. If for example you are looking for {BasicObj Obj}s with the ObjClass "Publication", # and there are 93 objs in total, than enum.take(10) will fetch the first 10 objs only, # ignoring the other 83 objs. # This also means, that iterating multiple times over this enumerator causes the search results and objs to be fetched again. # If you want to get all objs at once, use enum.to_a. # # To start searching use one of the {BasicObj Obj} methods that returns an {ObjSearchEnumerator}. The preferred way is to start with {BasicObj.where Obj.where}. # # == Currently available fields and their values # # [+:*+] Searches all fields. # This is only possible with the operators +contais+ or +starts_with+. # [+:id+] Id of an {BasicObj Obj}. This is a +string+ field. # [+:_path+] Path of an {BasicObj Obj}. This is a +string+ field. # [+:_name+] Name of an {BasicObj Obj}. This is a +string+ field. # [+:title+] Title of an {BasicObj Obj}. This is a +string+ field. # [+:body+] Body of an {BasicObj Obj}. This is an +html+ field. Thus, only the +contains+ and # +contains_prefix+ operators can be applied to this field. # [+:_obj_class+] ObjClass of an {BasicObj Obj}. This is a +string+ field. # [+:_permalink+] Permalink of an {BasicObj Obj}. This is a +string+ field. # [+:_last_changed+] Date of last change of an {BasicObj Obj}. # [every :custom_attribute] Custom attribute of an {BasicObj Obj}. Note that # depending on the attribute type (e.g. an # +html+ field), some operators can not be applied. # # All values are stored as strings. # # Date values are stored in the format YYYYMMDDHHMMSS in UTC. For example, 2000-01-01 00:00:00 UTC is stored as "20000101000000". # This is relevant for string comparisons using the +is_less_than+ and +is_greater_than+ operators. # # == Currently available operators # # For +:contains+ and +:contains_prefix+, the examples are based on the following field value: # "Behind every cloud is another cloud." # # [+:contains+] Searches for one or more whole words. Each word needs to be present. # # Example subquery values: # # ✔ "behind cloud" (case insensitive) # # ✘ "behi clo" (not whole words) # # ✘ "behind everything" (second word does not match) # [+:contains_prefix+] Searches for one prefix. A whole word is also a prefix. # # Example subquery values: # # ✔ "Clou" (case insensitive) # # ✔ "Every" (case insensitive) # # For +:equals+ and +:starts_with+, the examples are based on the following field value: # "Some content." # # [+:equals+] The +field+ value needs to be identical to the +value+ of this subquery. # # Only applicable to +string+, +enum+, +multienum+ and +date+ fields. # # Example subquery values: # # ✔ "Some content." (exact value) # # ✘ "Some" (not exact value) # # [+:starts_with+] The +field+ value needs to start exactly with the +value+ of this subquery. # # Only applicable to +string+, +enum+, +multienum+ and +date+ fields. # # Example subquery values: # # ✔ "Som" (prefix of the value) # # ✘ "som" (incorrect case of prefix) # # ✘ "content" (not prefix of the whole value) # # For +:is_less_than+ and +:is_greater_than+, the examples are based on the following field value (date string): # "20000101000000" # # [+:is_less_than+] Matches if the field string value is less than the subquery string value. # # Only applicable to +string+, +enum+, +multienum+ and +date+ fields. # # Example subquery values: # # ✔ "19991231235959" (is less than "20000101000000") # # ✘ "20000101000000" (equal, not less than) # # [+:is_greater_than+] Matches if the field string value is greater than the subquery string value. # # Only applicable to +string+, +enum+, +multienum+ and +date+ fields. # # Example subquery values: # # ✔ "20000101000001" (is greater than "20000101000000") # # ✘ "20000101000000" (equal, not greater than) # # @api public class ObjSearchEnumerator class UnregisteredObjFormat < StandardError; end include Enumerable attr_reader :workspace attr_reader :query def initialize(workspace) @workspace = workspace @options = {} end # @group Chainable methods # Adds this additional AND subquery to this {ObjSearchEnumerator}. # # Compares the +field(s)+ with the +value(s)+ using the +operator+ of this subquery. # All objs to which this criterion applies remain in the result set. # # @param [Symbol, String, Array] field Name(s) of the field(s) to be searched. # For arrays, the subquery matches, if one or more of these fields meet this criterion. # @param [Symbol, String] operator See "Currently available operators" at the top. # @param [String, Array] value The value(s) with which the field value(s) are compared using the +operator+ of this subquery. # For arrays, the subquery matches, if the condition is met for one or more of the array elements. # @param [Hash] boost A hash where the keys are field names and their values are boosting factors. # Boosting factors must be in the range from 1 to 10. # Boosting can only be applied to subqueries in which the +contains+ or +contains_prefix+ operator is used. # @return [ObjSearchEnumerator] # @api public def and(field, operator, value, boost = nil) real_operator = operator_mapping(operator) subquery = {:field => field, :operator => real_operator, :value => convert_value(value)} if boost.present? valid_boost_operators = [:contains, :contains_prefix] if valid_boost_operators.include?(operator.to_sym) subquery[:boost] = boost else raise "Boost is not allowed with operator '#{operator}'. " + "Valid operators are: #{valid_boost_operators.join(', ')}" end end @size = nil @query = (query || []) + [subquery] self end # Adds this additional negated AND subquery to this {ObjSearchEnumerator}. # # Compares the +field(s)+ with the +value(s)+ using the negated +operator+ of this subquery. # All objs to which this criterion applies are removed from the result set. # # @param [Symbol, String, Array] field Name(s) of the field(s) to be searched. # For arrays, the subquery matches, if one or more of these fields meet this criterion. # @param [Symbol, String] operator Only applicable to subqueries in which the +equals+, # +starts_with+, +is_greater_than+ and +is_less_than+ operator is used # (See "Currently available operators" at the top). # @param [String, Array] value The value(s) with which the field value(s) are compared using the +operator+ of this subquery. # For arrays, the subquery matches, if the condition is met for one or more of the array elements. # @return [ObjSearchEnumerator] # @api public def and_not(field, operator, value) real_operator = operator_mapping(operator) valid_negated_operators = [:equals, :starts_with, :is_greater_than, :is_less_than] unless valid_negated_operators.include?(operator.to_sym) raise "Negating operator '#{operator}' is not valid." end subquery = {:field => field, :operator => real_operator, :value => convert_value(value), :negate => true} @size = nil @query = (query || []) + [subquery] self end # Orders the results by +field_name+. # @param [Symbol, String] field_name This parameter determines by which field the hits are sorted (e.g. +:_path+). # @return [ObjSearchEnumerator] # @api public def order(field_name) options[:sort_by] = field_name self end # Reverses the order of the results. Requires {#order} to be specified before. # @return [ObjSearchEnumerator] # @api public def reverse_order options[:sort_by].present? or raise "A search order has to be specified"\ " before reverse_order can be applied." @reverse_order = !@reverse_order self end # Number of search results to be returned by each of the internal search requests. # The default is +10+. The server may limit a large batch size to a reasonable value. # @param [Integer] size A value in the range from +1+ to +100+. # @return [ObjSearchEnumerator] # @api public def batch_size(size) options[:size] = size self end # Omits the first +amount+ number of {BasicObj Obj}s from the results. The default is +0+. # @param [Integer] amount # @return [ObjSearchEnumerator] # @api public def offset(amount) options[:offset] ||= 0 options[:offset] += amount self end def include_deleted @include_deleted = true self end # @!endgroup # Iterates over the search result, yielding {BasicObj Obj}. # @yield [Obj] # @return [void] # @api public def each offset = options[:offset] || 0 current_batch, total = fetch_next_batch(offset) loop do if current_batch.size == 0 if offset < total current_batch, total = fetch_next_batch(offset) else raise StopIteration end end offset += 1 hit = current_batch.shift yield hit end end # The total number of hits. # @return [Integer] # @api public def size return @size if @size size_query = { query: query, size: 0 } if @include_deleted size_query[:options] = { include_deleted: true } end @size ||= CmsRestApi.get(resource_path, size_query)['total'].to_i end # load a single batch of search results from the backend. # returns an array of Objs. # will usually return `batch_size` results if available, # but may occasionally return fewer than `batch_size` results (due to rate limit, for example). # @api public def load_batch next_batch = fetch_next_batch(options[:offset] || 0) formatter = @formatter || -> obj { obj.id } next_batch.first.map { |obj| formatter.call(obj) } end def format(name) @formatter = Configuration.obj_formats[name] unless @formatter raise UnregisteredObjFormat, "The format with name '#{name}' is not registered." end self end # @api public alias_method :length, :size # @api public alias_method :count, :size private attr_reader :options def convert_value(value) if value.kind_of?(Array) value.map{ |v| convert_single_value(v) } else convert_single_value(value) end end def convert_single_value(value) if value.is_a?(Time) || value.is_a?(Date) CmsRestApi::AttributeSerializer.convert_time(value) else value end end def operator_mapping(operator) case operator.to_sym when :contains :search when :contains_prefix :prefix_search when :equals :equal when :starts_with :prefix when :is_greater_than :greater_than when :is_less_than :less_than else raise "Operator '#{operator}'' is not valid!" end end def fetch_next_batch(offset) request_result = CmsRestApi.get(resource_path, search_dsl(offset)) obj_ids = request_result['results'].map { |result| result['id'] || result['_id'] } objs = workspace.objs.find_including_deleted(obj_ids) @size = request_result['total'].to_i [objs, @size] end def resource_path "workspaces/#{workspace.id}/objs/search" end def search_dsl(offset) patches = { offset: offset, query: query, } if @reverse_order patches[:sort_order] = options[:sort_by].present? ? :desc : :asc end if @include_deleted patches[:options] = { include_deleted: true } end options.merge(patches) end end end