# encoding: UTF-8 module Scrivito # Provides an enumerator for iterating over the results of searches for CMS objects to retrieve # instances of these objects. This is achieved through the # {http://ruby-doc.org/core-2.1.3/Enumerable.html +Enumerable+ mixin}, which provides methods such # as +map+, +select+ or +take+. # # This enumerator is lazy. If, for example, you are looking for {Scrivito::BasicObj Obj}s whose # object class is +Publication+, and there are 93 objects in total, then +enum.take(10)+ fetches # the first 10 objects only, ignoring the other 83. # This implies that repeatedly iterating over this enumerator causes the search results and the # objects to be fetched again and again. If you want to get all objects at once, use +enum.to_a+. # # To start searching, use one of the {Scrivito::BasicObj Obj} methods that return an # {Scrivito::ObjSearchEnumerator}. The preferred way is to start with # {Scrivito::BasicObj.where Obj.where}. # # == Currently available fields and their values # # [+:*+] Searches all fields. # This is only possible with the +contains+ and +starts_with+ operators. # [+:id+] Id of an {Scrivito::BasicObj Obj}. This is a +string+ field. # [+:_path+] Path of an {Scrivito::BasicObj Obj}. This is a +string+ field. # [+:_name+] Name of an {Scrivito::BasicObj Obj}. This is a +string+ field. # [+:_obj_class+] Object class of an {Scrivito::BasicObj Obj}. This is a +string+ field. # [+:_permalink+] Permalink of an {Scrivito::BasicObj Obj}. This is a +string+ field. # [+:_last_changed+] Date of last change of an {Scrivito::BasicObj Obj}. # [every +_:custom_attribute_+] Custom attribute of an {Scrivito::BasicObj Obj}. Note that depending on the attribute type (e.g. an +html+ field), some operators cannot be applied. # # == Currently available operators # # === +contains+ and +contains_prefix+ # # These operators are intended for full text search of natural language texts. # They are applicable to +string+, +stringlist+, +enum+, +multienum+ and +html+ fields. # # For +contains+ and +contains_prefix+, the examples are based on the following field value: # "Behind every cloud is another cloud." # # [+:contains+] Searches for one or more whole words. Each word needs to be present. # # Example subquery values: # # ✔ "behind cloud" (case insensitive) # # ✘ "behi clo" (not whole words) # # ✘ "behind everything" (second word does not match) # [+:contains_prefix+] Searches for a word prefix. # # Example subquery values: # # ✔ "Clou" (case insensitive) # # ✔ "Every" (case insensitive) # # === +equals+ and +starts_with+ # # These operators are intended for programmatic comparions of string and date values. # # The +equals+ and +prefix+ operators have some limits with regard to string length. # String values are only guaranteed to be considered if they are at most 1000 characters in length. # String values of more than 1000 characters may be ignored by these operators. # # The +prefix+ operator also has a precision limit: # Only prefixes of up to 20 characters are guaranteed to be matched. # If you supply a prefix of more than 20 characters, the additional characters may be ignored. # # When combined with the system attribute +_path+, the operator +prefix+ has some special functionality: # There is not precision limit, i.e. a prefix of arbitrary length may be used to match on +_path+. # Also, prefix matching on +_path+ automatically matches entire path components, # i.e. the prefix matching is delimited by slashes (the character +'/'+). # # For +equals+ and +starts_with+, the examples are based on the following field value: # "Some content." # # [+:equals+] The +field+ value needs to be identical to the +value+ of this subquery. # # Applicable to +string+, +stringlist+, +enum+, +multienum+ and +date+ fields. # # Example subquery values: # # ✔ "Some content." (exact value) # # ✘ "Some" (not exact value) # # [+:starts_with+] The +field+ value needs to start exactly with the +value+ of this subquery. # # Applicable to +string+, +stringlist+, +enum+ and +multienum+ fields. # # Example subquery values: # # ✔ "Som" (prefix of the value) # # ✘ "som" (incorrect case of prefix) # # ✘ "content" (not prefix of the whole value) # # === +is_less_than+ and +is_greater_than+ # # These operators are intended for comparions on +date+ attributes and on numerical metadata, for example the width of an image. # # For +is_less_than+ and +is_greater_than+, the examples are based on the following date value: # +Time.new(2000,01,01,00,00,00)+ # # [+:is_less_than+] Matches if the field value is less than the subquery string value. # # Example subquery values: # # ✔ +Time.new(1999,12,31,23,59,59)+ (is less than) # # ✘ +Time.new(2000,01,01,00,00,00)+ (equal, not less than) # # [+:is_greater_than+] Matches if the field value is greater than the subquery string value. # # Example subquery values: # # ✔ +Time.new(2000,01,01,00,00,01)+ (is greater than) # # ✘ +Time.new(2000,01,01,00,00,00)+ (equal, not greater than) # # == Matching +multienum+ and +stringlist+ # # Attributes of type +multienum+ and +stringlist+ contain an array of strings. # Each of these strings is searched individually. # A search query matches a +multienum+ or +stringlist+, if at least one string in the list matches. # Example: A query using the operator +:equals+ and the value +"Eggs"+ matches an Obj containing +["Spam","Eggs"]+ in a +stringlist+ or +multienum+ attribute. # # @api public class ObjSearchEnumerator include Enumerable attr_reader :workspace attr_reader :query def initialize(workspace) @workspace = workspace @options = {} end # @group Chainable methods # Adds the given AND subquery to this {Scrivito::ObjSearchEnumerator}. # # Compares the +field(s)+ with the +value(s)+ using the +operator+ of this subquery. # All CMS objects to which this criterion applies remain in the result set. # # @param [Symbol, String, Array] field Name(s) of the field(s) to be searched. # For arrays, the subquery matches if one or more of these fields meet this criterion. # @param [Symbol, String] operator See "Currently available operators" above. # @param [String, Date, Time, Array] value The value(s) to compare with the field value(s) using the # +operator+ of this subquery. For arrays, the subquery matches if the condition is met for # one or more of the array elements. # @param [Hash] boost A hash where the keys are field names and their values are boosting # factors. Boosting factors must be in the range from 1 to 10. Boosting can only be applied to # subqueries in which the +contains+ or +contains_prefix+ operator is used. # @return [Scrivito::ObjSearchEnumerator] # @api public def and(field, operator, value, boost = nil) real_operator = operator_mapping(operator) subquery = {:field => field, :operator => real_operator, :value => convert_value(value)} if boost.present? valid_boost_operators = [:contains, :contains_prefix] if valid_boost_operators.include?(operator.to_sym) subquery[:boost] = boost else raise "Boost is not allowed with operator '#{operator}'. " + "Valid operators are: #{valid_boost_operators.join(', ')}" end end @size = nil @query = (query || []) + [subquery] self end # Adds the given negated AND subquery to this {Scrivito::ObjSearchEnumerator}. # # Compares the +field(s)+ with the +value(s)+ using the negated +operator+ of this subquery. # All CMS objects to which this criterion applies are removed from the result set. # # @param [Symbol, String, Array] field Name(s) of the field(s) to be searched. # For arrays, the subquery matches if one or more of these fields meet this criterion. # @param [Symbol, String] operator Only applicable to subqueries in which the +equals+, # +starts_with+, +is_greater_than+ or +is_less_than+ operator is used. # (See "Currently available operators" above). # @param [String, Date, Time, Array] value The value(s) to compare with the field value(s) using the # +operator+ of this subquery. For arrays, the subquery matches if the condition is met for # one or more of the array elements. # @return [Scrivito::ObjSearchEnumerator] # @api public def and_not(field, operator, value) real_operator = operator_mapping(operator) valid_negated_operators = [:equals, :starts_with, :is_greater_than, :is_less_than] unless valid_negated_operators.include?(operator.to_sym) raise "Negating operator '#{operator}' is not valid." end subquery = {:field => field, :operator => real_operator, :value => convert_value(value), :negate => true} @size = nil @query = (query || []) + [subquery] self end # Orders the results by +field_name+. # # Applicable to the attribute types +string+, +enum+ and +date+. # # There is a precision limit when sorting string values: # Only the first 50 characters of a string are guaranteed to be considered when sorting search results. # # @param [Symbol, String] field_name This parameter specifies the field by which the hits are # sorted (e.g. +:_path+). # @return [Scrivito::ObjSearchEnumerator] # @api public def order(field_name) options[:sort_by] = field_name self end # Reverses the order of the results. Requires {#order} to be applied before. # @return [Scrivito::ObjSearchEnumerator] # @api public def reverse_order options[:sort_by].present? or raise "A search order has to be specified"\ " before reverse_order can be applied." @reverse_order = !@reverse_order self end # Number of search results to be returned by each of the internal search requests. # The default is +10+. # # Scrivito makes a best effort to return the given number of search results, # but may under certain circumstances return larger or smaller batches due to technical # reasons. # # @param [Integer] size A value in the range from +1+ to +100+. # @return [Scrivito::ObjSearchEnumerator] # @api public def batch_size(size) options[:size] = size self end # Omits the first +amount+ of {Scrivito::BasicObj Obj}s from the results. The default is +0+. # @param [Integer] amount # @return [Scrivito::ObjSearchEnumerator] # @api public def offset(amount) options[:offset] ||= 0 options[:offset] += amount self end def include_deleted @include_deleted = true self end # @!endgroup # Iterates over the search result, yielding {Scrivito::BasicObj Obj}. # @yield [Obj] # @return [void] # @api public def each offset = options[:offset] || 0 current_batch, total = fetch_next_batch(offset) loop do if current_batch.size == 0 if offset < total current_batch, total = fetch_next_batch(offset) else raise StopIteration end end offset += 1 hit = current_batch.shift yield hit end end # The total number of hits. # # This number is an approximation. Scrivito makes a best effort to deliver # the exact number of hits. But due to technical reasons, the returned number may differ # from the actual number under certain circumstances. # # @return [Integer] # @api public def size return @size if @size size_query = { query: query, size: 0 } if @include_deleted size_query[:options] = { include_deleted: true } end @size ||= CmsBackend.instance.search_objs(workspace, size_query)['total'].to_i end # Loads a single batch of search results from the backend. # @return [Array] of {Scrivito::BasicObj Obj}. # Usually returns +batch_size+ results if available, # but may occasionally return more or fewer than +batch_size+ results # (due to technical reasons). If you need an exact number of hits, use # methods from +Enumerable+, for example +take+. # @api public def load_batch next_batch = fetch_next_batch(options[:offset] || 0) next_batch.first end # @api public alias_method :length, :size # @api public alias_method :count, :size # @api public beta # Perform a faceted search over an attribute to retrieve structured results for individual values of this attribute. # # Applicable to attribute types +string+, +stringlist+, +enum+, +multienum+. # # Please note that there is a precision limit for faceting: # Only the first 50 characters of a string are guaranteed to be considered for faceting. # If two string values have the same first 50 characters, they may be grouped into the same facet value. # # @param [String] attribute_name the name of an attribute # @param [Hash] options the options to facet a request with. # @option options [Integer] :limit maximum number of unique values to return. Defaults to 20. # @option options [Integer] :include_objs number of Objs to fetch for each unique value. Defaults to 0. # # @return [Array] # A list of unique values that were found for the given attribute name. The list is # ordered by frequency, i.e. values occurring more frequently come first. # # @example Faceted request: colors of _big_ balloons. # facets = Balloon.where(:size, :equals, "big").facet("color") # # # Big balloons come in 3 colors: # facets.count #=> 3 # # # There are 3 big red balloons: # red_balloons = facets.first # red_balloons.name #=> "red" # red_balloons.count #=> 3 # # # There are 2 big green balloons: # green_balloons = facets.second # green_balloons.name #=> "green" # green_balloons.count #=> 2 # # # There is 1 big blue balloon: # blue_balloons = facets.third # blue_balloons.name #=> "blue" # blue_balloons.count #=> 1 # # @example Faceted request with limit: at most 2 colors of big balloons. # facets = Balloon.where(:size, :equals, "big").facet("color", limit: 2) # # # Although there are 3 different colors of big balloons, # # only the first 2 colors will be taken into account. # facets.count # => 2 # # @example Faceted request with included Objs. # facets = Balloon.where(:size, :equals, "big").facet("color", include_objs: 2) # # facets.each do |facet| # facet.included_objs.each do |obj| # puts "#{obj.size} #{obj.color} #{obj.class}" # end # end # # # If there are 3 big red balloons, 2 big green balloons and 1 big blue balloon, # # then this will produce: # # "big red Balloon" # "big red Balloon" # "big green Balloon" # "big green Balloon" # "big blue Balloon" # # @raise [Scrivito::ClientError] If the maximum number of results has been exceeded. # The maximum number of results is limited to 100 with respect to the facets themselves and the included objs. # def facet(attribute_name, options = {}) facets_params = [{ attribute: attribute_name }.merge!(options)] result = get_facet_value_objs(facets_params, [attribute_name]) result[attribute_name] end private attr_reader :options def convert_value(value) if value.kind_of?(Array) value.map{ |v| convert_single_value(v) } else convert_single_value(value) end end def convert_single_value(value) if value.is_a?(Time) || value.is_a?(Date) DateAttribute.serialize_for_backend(value) else value.to_s end end def create_facet_value_objs(facet_arrays, obj_collection) result = [] facet_arrays.map do |facet| included_objs = [] if included_ids = get_objs_facet_ids(facet) obj_collection.each do |basic_obj| if included_ids.include? basic_obj.id included_objs << basic_obj unless included_objs.include? basic_obj end end end result << ObjFacetValue.new(facet["value"], facet["total"], included_objs) end result end def get_all_facets_ids(facet_array) result = [] facet_array.map do |facet| result += get_objs_facet_ids(facet) end result end def get_objs_facet_ids(facet) result = [] if included_ids = facet["results"] included_ids.each { |obj| result << obj["id"] } end result end def get_facet_value_objs(facets_params, attributes_list = []) result = attributes_list.each_with_object({}) { |v,h| h[v] = [] } included_objs_ids = [] params = { facets: facets_params } if query offset = options[:offset] || 0 params.merge! search_dsl(offset) end params.reverse_merge!(size: 0) request_result = CmsBackend.instance.search_objs(workspace, params) request_result['facets'].each do |facets_array| included_objs_ids += get_all_facets_ids(facets_array) end obj_collection = Scrivito::BasicObj.find(included_objs_ids) request_result['facets'].each_with_index do |facets_array, index| result[result.keys[index]] += create_facet_value_objs(facets_array, obj_collection) end result end def operator_mapping(operator) case operator.to_sym when :contains :search when :contains_prefix :prefix_search when :equals :equal when :starts_with :prefix when :is_greater_than :greater_than when :is_less_than :less_than else raise "Operator '#{operator}'' is not valid!" end end def fetch_next_batch(offset) request_result = CmsBackend.instance.search_objs(workspace, search_dsl(offset)) obj_ids = request_result['results'].map { |result| result['id'] || result['_id'] } objs = workspace.objs.find_including_deleted(obj_ids) @size = request_result['total'].to_i [objs, @size] end def search_dsl(offset) patches = { offset: offset, query: query, } if @reverse_order patches[:sort_order] = options[:sort_by].present? ? :desc : :asc end if @include_deleted patches[:options] = { include_deleted: true } end options.merge(patches) end end end