# encoding: UTF-8 module Scrivito # Provides an enumerator for iterating over the results of searches for CMS objects to retrieve # instances of these objects. This is achieved through the # {https://ruby-doc.org/core/Enumerable.html +Enumerable+ mixin}, which provides methods such # as +map+, +select+ or +take+. # # This enumerator is lazy. If, for example, you are looking for {Scrivito::BasicObj Obj}s whose # object class is +Publication+, and there are 93 objects in total, then +enum.take(10)+ fetches # the first 10 objects only, ignoring the other 83. # This implies that repeatedly iterating over this enumerator causes the search results and the # objects to be fetched again and again. If you want to get all objects at once, use +enum.to_a+. # # To start searching, use one of the {Scrivito::BasicObj Obj} methods that return an # {Scrivito::ObjSearchEnumerator}. The preferred way is to start with # {Scrivito::BasicObj.where Obj.where} or {Scrivito::BasicObj.all Obj.all}. # # == Currently available fields and their values # # [+:*+] Searches all fields. # This is only possible with the +contains+, +contains_prefix+, +refers_to+ # and +links_to+ operators. # [+:id+] Id of an {Scrivito::BasicObj Obj}. This is a +string+ field. # [+:_path+] Path of an {Scrivito::BasicObj Obj}. This is a +string+ field. # [+:_name+] Name of an {Scrivito::BasicObj Obj}. This is a +string+ field. # [+:_obj_class+] Object class of an {Scrivito::BasicObj Obj}. This is a +string+ field. # [+:_permalink+] Permalink of an {Scrivito::BasicObj Obj}. This is a +string+ field. # [+:_created_at+] The creation date of an {Scrivito::BasicObj Obj}. # [+:_last_changed+] Date of last change to an {Scrivito::BasicObj Obj}. # [every +_:custom_attribute_+] Custom attribute of an {Scrivito::BasicObj Obj}. Note that depending on the attribute type (e.g. an +html+ field), some operators cannot be applied. # # === Meta Data # # If an {Scrivito::BasicObj Obj} has a +binary+ attribute named +blob+, the meta data of this # attribute is searchable. For a full list of the available meta data attributes, see the # documentation of the {Scrivito::MetaDataCollection MetaDataCollection}. The meta data # attribute name needs to be prefixed with +blob:+ when searching for it. So, for example, # when searching for the width, you need to specify the attribute name using +blob:width+. # Binary attributes other than +blob+ are not searchable. # # == Currently available operators # # === +contains+ and +contains_prefix+ # # These operators are intended for full text search of natural language texts. # They are applicable to +string+, +stringlist+, +enum+, +multienum+ and +html+ fields. # # For +contains+ and +contains_prefix+, the examples are based on the following field value: # "Behind every cloud is another cloud." # # [+:contains+] Searches for one or more whole words. Each word needs to be present. # # Example subquery values: # # ✔ "behind cloud" (case insensitive) # # ✘ "behi clo" (not whole words) # # ✘ "behind everything" (second word does not match) # [+:contains_prefix+] Searches for a word prefix. # # Example subquery values: # # ✔ "Clou" (case insensitive) # # ✔ "Every" (case insensitive) # # === +equals+ # # The +equals+ operator is intended for programmatic comparisons of string and date values. # # The operator has some limits with regard to string length. # String values are only guaranteed to be considered if they are at most 1000 characters in length. # String values of more than 1000 characters may be ignored by these operators. # # For +equals+, the examples are based on the following field value: # "Some content." # # [+:equals+] The +field+ value needs to be identical to the +value+ of this subquery. # # Applicable to +string+, +stringlist+, +enum+, +multienum+, # +float+, +integer+ and +date+ fields. # # Example subquery values: # # ✔ "Some content." (exact value) # # ✘ "Some" (not exact value) # # === +starts_with+ # # The +starts_with+ is intended for programmatic comparisons of string values. # # The +starts_with+ operator has a precision limit: # Only prefixes of up to 20 characters are guaranteed to be matched. # If you supply a prefix of more than 20 characters, the additional characters may be ignored. # # When combined with the system attribute +_path+, the operator +starts_with+ has some special functionality: # There is not precision limit, i.e. a prefix of arbitrary length may be used to match on +_path+. # Also, prefix matching on +_path+ automatically matches entire path components, # i.e. the prefix matching is delimited by slashes (the character +'/'+). # # For +starts_with+, the examples are based on the following field value: # "Some content." # # [+:starts_with+] The +field+ value needs to start exactly with the +value+ of this subquery. # # Applicable to +string+, +stringlist+, +enum+ and +multienum+ fields. # # Example subquery values: # # ✔ "Som" (prefix of the value) # # ✘ "som" (incorrect case of prefix) # # ✘ "content" (not prefix of the whole value) # # === +is_less_than+ and +is_greater_than+ # # These operators are intended for comparing +date+, +integer+, or +float+ values. # It only considers attributes of {Scrivito::BasicObj Obj}s and _not_ of {Scrivito::BasicWidget Widget}s. # Therefore, {Scrivito::BasicWidget Widget} attributes are not searchable using the +is_less_than+ and +is_greater_than+ operators. # # For +is_less_than+ and +is_greater_than+, the examples are based on the following date value: # +Time.new(2000,01,01,00,00,00)+ # # [+:is_less_than+] Matches if the field value is less than the subquery string value. # # Example subquery values: # # ✔ +Time.new(1999,12,31,23,59,59)+ (is less than) # # ✘ +Time.new(2000,01,01,00,00,00)+ (equal, not less than) # # [+:is_greater_than+] Matches if the field value is greater than the subquery string value. # # Example subquery values: # # ✔ +Time.new(2000,01,01,00,00,01)+ (is greater than) # # ✘ +Time.new(2000,01,01,00,00,00)+ (equal, not greater than) # # For +is_less_than+ and +is_greater_than+, the examples are based on the following float value: # +23.42+ # # [+:is_less_than+] Matches if the field value is less than the subquery numeric value. # # Example subquery values: # # ✔ +23.41+ (is less than) # # ✔ +5+ (is less than) # # ✘ +23.42+ (equal, not less than) # # [+:is_greater_than+] Matches if the field value is greater than the subquery numeric value. # # Example subquery values: # # ✔ +23.43+ (is greater than) # # ✔ +42+ (is greater than) # # ✘ +23.42+ (equal, not greater than) # # === +links_to+ # # The +links_to+ operator searches for CMS objects containing one or more attributes linking to # specific CMS objects. So the operator returns the CMS objects in which at least one +html+, # +link+, +linklist+, +reference+ or +referencelist+ attribute links to specific CMS objects. # # The operator can only be applied to all attributes, so the +"*"+ wildcard _must_ be specified # for the attributes to search. If you want to search specific +reference+ or +referencelist+ # attributes, please use the +refers_to+ operator. # # Using +nil+ instead of an instance of {Scrivito::BasicObj Obj} raises an error. # # Note that, in contrast to the +refers_to+ operator, the +links_to+ operator searches the # attributes directly part of the CMS objects _as_ _well_ _as_ the attributes contained in # widgets. # # [+:links_to+] Searches for CMS objects linking to a specific CMS object. # # Example subquery values: # # ✔ +my_obj+ (an instance of {Scrivito::BasicObj Obj}) # # ✔ +[my_obj1, my_obj2]+ (an +Array+ of instances of {Scrivito::BasicObj Obj}) # # ✘ +nil+ (not an instance of {Scrivito::BasicObj Obj}) # # ✘ "some_string" (not an instance of {Scrivito::BasicObj Obj}) # # === +refers_to+ # # The +refers_to+ operator searches for CMS objects in which at least one of the specified # +reference+ or +referencelist+ attributes refers to specific CMS objects. # # Using the +"*"+ wildcard for the attributes to search causes all +reference+ and +referencelist+ # attributes of the searched CMS objects to be taken into account. # # Using +nil+ instead of {Scrivito::BasicObj Objs} searches for all CMS objects in which none of # the specified attributes refer to a CMS object. # # Note that, in contrast to the +links_to+ operator, the +refers_to+ operator only searches # attributes _directly_ _part_ _of_ _the_ _CMS_ _objects_. Currently, attributes contained in # widgets are _not_ searched. # # [+:refers_to+] Searches for CMS objects in which specific +reference+ or +referencelist+ attributes refer to specific CMS objects. # # Example subquery values: # # ✔ +my_obj+ (an instance of {Scrivito::BasicObj Obj}) # # ✔ +[my_obj1, my_obj2]+ (an +Array+ of instances of {Scrivito::BasicObj Obj}) # # ✔ +nil+ # # ✘ "some_string" (not an instance of {Scrivito::BasicObj Obj}) # # == Matching +multienum+ and +stringlist+ # # Attributes of type +multienum+ and +stringlist+ contain an array of strings. # Each of these strings is searched individually. # A search query matches a +multienum+ or +stringlist+, if at least one string in the list matches. # Example: A query using the operator +:equals+ and the value +"Eggs"+ matches an Obj containing +["Spam","Eggs"]+ in a +stringlist+ or +multienum+ attribute. # # == Limits # # The number of chainable subqueries is limited. The limit depends on the number of values, # fields, and boost parameters requested, as well as the number of words in a free text # search. # # == Concurrent changes # # Please be aware that concurrent changes can change the search result and # can yield incomplete results. This is due to the fact, that search results are lazily loaded in # batches. If you want to modify the result of a search, please call +to_a+ first. # # @example Concurrent changes # # bad example # books = Book.where(:price, :equals, 10.99) # books.map { |book| book.update(price: 9.99) } # # # good example # books = Book.where(:price, :equals, 10.99).to_a # books.map { |book| book.update(price: 9.99) } # # @api public class ObjSearchEnumerator INVALID_NEGATED_OPERATORS = [:contains, :contains_prefix].freeze OPERATOR_WHITELIST = [ :contains, :contains_prefix, :equals, :starts_with, :is_greater_than, :is_less_than, :links_to, :refers_to, ].freeze include Enumerable attr_reader :workspace attr_reader :query def initialize(workspace, batch_size = nil) @workspace = workspace @batch_size = batch_size @options = { offset: 0 } end # @group Chainable methods # Adds the given AND subquery to this {Scrivito::ObjSearchEnumerator}. # # Compares the +field(s)+ with the +value(s)+ using the +operator+ of this subquery. # All CMS objects to which this criterion applies remain in the result set. # # @param [Symbol, String, Array] field Name(s) of the field(s) to be searched. # For arrays, the subquery matches if one or more of these fields meet this criterion. # @param [Symbol, String] operator See "Currently available operators" above. # @param [String, Integer, Float, Date, Time, Array] # value The value(s) to compare with the field value(s) using the # +operator+ of this subquery. For arrays, the subquery matches if the condition is met for # one or more of the array elements. # @param [Hash] boost A hash where the keys are field names and their values are boosting # factors. Boosting factors must be in the range from 1 to 10. Boosting can only be applied to # subqueries in which the +contains+ or +contains_prefix+ operator is used. # @return [Scrivito::ObjSearchEnumerator] # @api public def and(field, operator, value, boost = nil) symbolized_operator = ensure_symbol_and_validate_operator(operator) subquery = { field: field, operator: symbolized_operator, value: convert_value(value) } if boost.present? valid_boost_operators = [:contains, :contains_prefix] if valid_boost_operators.include?(symbolized_operator) subquery[:boost] = boost else raise "Boost is not allowed with operator '#{operator}'. " + "Valid operators are: #{valid_boost_operators.join(', ')}" end end reset_for_changed_query @query = (query || []) + [subquery] self end # Adds the given negated AND subquery to this {Scrivito::ObjSearchEnumerator}. # # Compares the +field(s)+ with the +value(s)+ using the negated +operator+ of this subquery. # All CMS objects to which this criterion applies are removed from the result set. # # @param [Symbol, String, Array] field Name(s) of the field(s) to be searched. # For arrays, the subquery matches if one or more of these fields meet this criterion. # @param [Symbol, String] operator Must be one of: +equals+, # +starts_with+, +is_greater_than+, +is_less_than+, +links_to+, +refers_to+. # (See "Currently available operators" above). # @param [String, Date, Time, Integer, Float Array] value The value(s) to compare with the field value(s) using the # +operator+ of this subquery. For arrays, the subquery matches if the condition is met for # one or more of the array elements. # @return [Scrivito::ObjSearchEnumerator] # @api public def and_not(field, operator, value) if INVALID_NEGATED_OPERATORS.include?(operator.to_sym) raise "Negating operator '#{operator}' is not valid." end symbolized_operator = ensure_symbol_and_validate_operator(operator) subquery = { field: field, operator: symbolized_operator, value: convert_value(value), negate: true } reset_for_changed_query @query = (query || []) + [subquery] self end # Orders the results by +field_name+. # # Applicable to the attribute types +string+, +enum+, +integer+, +float+ and +date+. # # There is a precision limit when sorting string values: # Only the first 50 characters of a string are guaranteed to be considered when sorting search results. # # @overload order(field_name) # @param [Symbol, String] field_name This parameter specifies the field by # which the hits are sorted (e.g. +:_path+). # # @overload order(field_and_direction) # @param [Hash] field_and_direction The field name and sort direction can be # specfied as the key and value of a hash. Valid directions are # +:asc+ and +:desc+. The default is +:asc+. # # @example Sorting descending # Obj.all.order(_last_changed: :desc) # # @return [Scrivito::ObjSearchEnumerator] # @api public def order(field_name) field_name, direction = if field_name.is_a?(Hash) field_name.to_a.first else [field_name, :asc] end options[:sort_by] = field_name options[:sort_order] = direction.to_sym self end # Reverses the order of the results. Requires {#order} to be applied before. # @return [Scrivito::ObjSearchEnumerator] # @deprecated This method is deprecated and will be removed in the next major # version. Please specify the direction using {#order}. # @api public def reverse_order Scrivito::Deprecation.warn_method("reverse_order", "order") options[:sort_by].present? or raise "A search order has to be specified"\ " before reverse_order can be applied." options[:sort_order] = options[:sort_order] == :asc ? :desc : :asc self end # # Number of search results to be returned by each of the internal search requests. # # @api public # # The default is +10+. # # Scrivito makes a best effort to return the given number of search results, but may under # certain circumstances return larger or smaller batches due to technical reasons. # # @param [Integer] size number of search results to be returned by each of the internal search # requests. Scrivito tries to honor the requested +size+ as much as possible, but there is no # guarantee. At the time of writing, +size+ is capped at +100+, for example. # # @return [Scrivito::ObjSearchEnumerator] # def batch_size(size) @batch_size = size @preload_batch = true self end # Omits the first +amount+ of {Scrivito::BasicObj Obj}s from the results. The default is +0+. # @param [Integer] amount # @return [Scrivito::ObjSearchEnumerator] # @api public def offset(amount) options[:offset] += amount self end def include_deleted @include_deleted = true self end # @!endgroup # Iterates over the search result, yielding {Scrivito::BasicObj Obj}. # @yield [Obj] # @return [void] # @api public def each iterator = BatchIterator.new(workspace, search_dsl_params, @preloaded_batch) iterator.each do |batch| batch.objs.each do |obj| yield obj end end @size = iterator.total end # The total number of hits. # # This number is an approximation. Scrivito makes a best effort to deliver # the exact number of hits. But due to technical reasons, the returned number may differ # from the actual number under certain circumstances. # # @return [Integer] # @api public def size return @size if @size size_query = { query: query, size: 0 } if @include_deleted size_query[:options] = { include_deleted: true } end @size ||= CmsBackend.search_objs(workspace, size_query)['total'].to_i end # Loads a single batch of search results from the backend. # @return [Array] of {Scrivito::BasicObj Obj}. # Usually returns +batch_size+ results if available, # but may occasionally return more or fewer than +batch_size+ results # (due to technical reasons). If you need an exact number of hits, use # methods from +Enumerable+, for example +take+. # @api public def load_batch fetch_batch.objs end # @api public alias_method :length, :size # @api public alias_method :count, :size # Perform a faceted search over up to ten attributes to retrieve structured results for # individual values of these attributes. # # Applicable to attributes of the following types: +string+, +stringlist+, +enum+, +multienum+. # # Please note that there is a precision limit for faceting: Only the first 50 characters of # a string are guaranteed to be considered for faceting. If two string values have the same # first 50 characters, they may be grouped into the same facet value. # # Please note that by default {Scrivito::ObjSearchEnumerator#facet} does not preload the first # batch of the search results. In order to reduce the number of search requests, +batch_size+ # can be explicitly set using the {Scrivito::ObjSearchEnumerator#batch_size} method. # This causes Scrivito to preload the first batch of the search results. # # @overload facet(attribute, options={}) # Single-attribute faceting request. # @param [String] attribute the name of an attribute. # @param [Hash] options the options to facet a request with. # @option options [Integer] :limit maximum number of unique values to return. # Defaults to +10+. # @option options [Integer] :include_objs maximum number of Objs to fetch for # each unique value. Defaults to +0+. # @return [Array] A list of unique values that were found for the # given attribute name. The list is ordered by frequency, # i.e. values occurring more frequently come first. # @overload facet(facets) # Multi-attribute faceting request. # The maximum number of attributes that may be specified is +10+. # @param [Hash] facets a hash where the keys are attribute names and the values are options. # The available options are identical to the options for single faceting requests. # @return [Hash] a hash where the keys are identical to the keys given in the # ++facets++ parameter. The values of the hash are lists of {Scrivito::ObjFacetValue}. # @raise [Scrivito::ClientError] If the number of attributes exceeds 10. # # @example Faceted request: colors of _big_ balloons: # facets = Balloon.where(:size, :equals, "big").facet("color") # # # Big balloons come in 3 colors: # facets.count #=> 3 # # # There are 3 big red balloons: # red_balloons = facets.first # red_balloons.name #=> "red" # red_balloons.count #=> 3 # # # There are 2 big green balloons: # green_balloons = facets.second # green_balloons.name #=> "green" # green_balloons.count #=> 2 # # # There is 1 big blue balloon: # blue_balloons = facets.third # blue_balloons.name #=> "blue" # blue_balloons.count #=> 1 # # @example Faceted request with limit: at most 2 colors of big balloons: # facets = Balloon.where(:size, :equals, "big").facet("color", limit: 2) # # # Although there are 3 different colors of big balloons, # # only the first 2 colors will be taken into account. # facets.count # => 2 # # @example Faceted request with included Objs: # facets = Balloon.where(:size, :equals, "big").facet("color", include_objs: 2) # # facets.each do |facet| # facet.included_objs.each do |obj| # puts "#{obj.size} #{obj.color} #{obj.class}" # end # end # # # If there are 2 big red balloons, 2 big green balloons and 1 big blue balloon, # # then this will produce: # # "big red Balloon" # "big red Balloon" # "big green Balloon" # "big green Balloon" # "big blue Balloon" # # @example Multiple faceting request: # facets = Balloon.where(:size, :equals, "big").facet( # color: {limit: 3, include_objs: 5}, # motif: {limit: 3, include_objs: 5} # ) # # color_facet_obj_values = facets[:color] # motif_facet_obj_values = facets[:motif] # # color_facet_obj_values.each do |facet| # facet.included_objs.each do |obj| # puts "#{obj.size} #{obj.color} #{obj.class}" # end # end # # motif_facet_obj_values.each do |facet| # facet.included_objs.each do |obj| # puts "#{obj.size} #{obj.motif} #{obj.class}" # end # end # # # If there are 2 big red balloons, 2 big green balloons and 1 big blue balloon, # # this will produce: # # "big red Balloon" # "big red Balloon" # "big green Balloon" # "big green Balloon" # "big blue Balloon" # # # If there are 1 big birthday balloon and 1 big wedding balloon, # # this will produce: # # "big birthday Balloon" # "big wedding Balloon" # # @example Faceted +where+ query with +batch_size+: # big_balloons = Balloon.where(:size, :equals, "big") # # # Without preloading # balloon_colors = big_balloons.facet("color") # first_ten_balloons = big_balloons.take(10) # This will cause a search request. # # # With preloading # big_balloons.batch_size(10) # Make Scrivito preload the first ten balloons. # balloon_colors = big_balloons.facet("color") # first_ten_balloons = big_balloons.take(10) # This will cause _no_ search request. # # @raise [Scrivito::ClientError] If the maximum number of results has been exceeded. # The number of results is limited to 100 with respect to the facets themselves # and the included Objs. # # @api public def facet(*facet_params) search_params = search_dsl_params search_params[:size] = 0 unless @preload_batch facet_query = FacetQuery.new(facet_params, search_params, workspace) facet_query.execute! @preloaded_batch = facet_query.batch if @preload_batch facet_query.result end # Suggests search terms that start with the provided prefix. # # +#suggest+ works case insensitive, and all results are converted to lower case. # # Preceding calls to +#and+ or +#and_not+ may limit the amount of documents that are searched # for suggestions. # # @param [String] prefix The prefix string for which suggestions should be determined. # @param [Hash] options # @option options [Integer] :limit The maximum number of suggestions to return. # Defaults to +5+. Maximum: +100+. # @option options [Array] :attributes The list of attributes that serve as input for # suggestions. The special value +["*"]+, which is the default, causes all # +html+, +string+ and +stringlist+ attributes to be taken account of. # # @return [Array] A list of words the user could search for. # # @example List suggestions that start with +sc+ # Obj.all.suggest('sc') # # ['scrivito', 'science', 'screen'] # # @example Limit suggestions to specific attributes # Obj.all.suggest('ham', { attributes: ['title'] }) # # ['hammer'] # # @example Limit the number of suggestions to 2 # Obj.all.suggest('sc', { limit: 2 }) # # ['scrivito', 'science'] # # @example Limit suggestions for the prefix Kin to objects for which the “de” language flag is set # Obj.where(:language, :equals, 'de').suggest('Kin') # # ['kindergarten', 'kind', 'kinder'] # # @api public def suggest(prefix, options = {}) o = options.with_indifferent_access params = { prefix: prefix, fields: o[:attributes] || ['*'], limit: o[:limit] || 5, from_search: search_dsl_params, } CmsRestApi.put("workspaces/#{workspace.id}/objs/search/suggest", params)['results'] end def fetch_batch(continuation=nil) batch = if @preloaded_batch && !continuation @preloaded_batch else QueryExecutor.new(workspace).call(search_dsl_params, continuation) end @size = batch.total batch end private attr_reader :options def reset_for_changed_query @size, @preloaded_batch = nil end def convert_value(value) if value.kind_of?(Array) value.map{ |v| convert_single_value(v) } else convert_single_value(value) end end def convert_single_value(value) case value when Scrivito::BasicObj value.id when Time, Date DateConversion.serialize_for_backend(value) when Integer, Float value else value.to_s end end def ensure_symbol_and_validate_operator(operator) symbolized_operator = operator.to_sym unless OPERATOR_WHITELIST.include?(symbolized_operator) raise "Operator '#{operator}' is not valid!" end symbolized_operator end def search_dsl_params patches = {query: query} patches[:size] = @batch_size if @batch_size patches[:options] = {include_deleted: true} if @include_deleted options.merge(patches) end end end