lib/dynamoid/criteria/chain.rb in dynamoid-3.5.0 vs lib/dynamoid/criteria/chain.rb in dynamoid-3.6.0

- old
+ new

@@ -20,30 +20,78 @@ @query = {} @source = source @consistent_read = false @scan_index_forward = true - # Honor STI and :type field if it presents - type = @source.inheritance_field - if @source.attributes.key?(type) - @query[:"#{type}.in"] = @source.deep_subclasses.map(&:name) << @source.name - end - # we should re-initialize keys detector every time we change query @key_fields_detector = KeyFieldsDetector.new(@query, @source) end - # The workhorse method of the criteria chain. Each key in the passed in hash will become another criteria that the - # ultimate query must match. A key can either be a symbol or a string, and should be an attribute name or - # an attribute name with a range operator. + # Returns a chain which is a result of filtering current chain with the specified conditions. # - # @example A simple criteria - # where(:name => 'Josh') + # It accepts conditions in the form of a hash. # - # @example A more complicated criteria - # where(:name => 'Josh', 'created_at.gt' => DateTime.now - 1.day) + # Post.where(links_count: 2) # + # A key could be either string or symbol. + # + # In order to express conditions other than equality predicates could be used. + # Predicate should be added to an attribute name to form a key +'created_at.gt' => Date.yesterday+ + # + # Currently supported following predicates: + # - +gt+ - greater than + # - +gte+ - greater or equal + # - +lt+ - less than + # - +lte+ - less or equal + # - +ne+ - not equal + # - +between+ - an attribute value is greater than the first value and less than the second value + # - +in+ - check an attribute in a list of values + # - +begins_with+ - check for a prefix in string + # - +contains+ - check substring or value in a set or array + # - +not_contains+ - check for absence of substring or a value in set or array + # - +null+ - attribute doesn't exists in an item + # - +not_null+ - attribute exists in an item + # + # All the predicates match operators supported by DynamoDB's + # {ComparisonOperator}[https://docs.aws.amazon.com/amazondynamodb/latest/APIReference/API_Condition.html#DDB-Type-Condition-ComparisonOperator] + # + # Post.where('size.gt' => 1000) + # Post.where('size.gte' => 1000) + # Post.where('size.lt' => 35000) + # Post.where('size.lte' => 35000) + # Post.where('author.ne' => 'John Doe') + # Post.where('created_at.between' => [Time.now - 3600, Time.now]) + # Post.where('category.in' => ['tech', 'fashion']) + # Post.where('title.begins_with' => 'How long') + # Post.where('tags.contains' => 'Ruby') + # Post.where('tags.not_contains' => 'Ruby on Rails') + # Post.where('legacy_attribute.null' => true) + # Post.where('optional_attribute.not_null' => true) + # + # There are some limitations for a sort key. Only following predicates + # are supported - +gt+, +gte+, +lt+, +lte+, +between+, +begins_with+. + # + # +where+ without argument will return the current chain. + # + # Multiple calls can be chained together and conditions will be merged: + # + # Post.where('size.gt' => 1000).where('title' => 'some title') + # + # It's equivalent to: + # + # Post.where('size.gt' => 1000, 'title' => 'some title') + # + # But only one condition can be specified for a certain attribute. The + # last specified condition will override all the others. Only condition + # 'size.lt' => 200 will be used in following examples: + # + # Post.where('size.gt' => 100, 'size.lt' => 200) + # Post.where('size.gt' => 100).where('size.lt' => 200) + # + # Internally +where+ performs either +Scan+ or +Query+ operation. + # + # @return [Dynamoid::Criteria::Chain] # @since 0.2.0 def where(args) detector = IgnoredConditionsDetector.new(args) if detector.found? Dynamoid.logger.warn(detector.warning_message) @@ -65,40 +113,120 @@ @key_fields_detector = KeyFieldsDetector.new(@query, @source) self end + # Turns on strongly consistent reads. + # + # By default reads are eventually consistent. + # + # Post.where('size.gt' => 1000).consistent + # + # @return [Dynamoid::Criteria::Chain] def consistent @consistent_read = true self end # Returns all the records matching the criteria. # + # Since +where+ and most of the other methods return a +Chain+ + # the only way to get a result as a collection is to call the +all+ + # method. It returns +Enumerator+ which could be used directly or + # transformed into +Array+ + # + # Post.all # => Enumerator + # Post.where(links_count: 2).all # => Enumerator + # Post.where(links_count: 2).all.to_a # => Array + # + # When the result set is too large DynamoDB divides it into separate + # pages. While an enumerator iterates over the result models each page + # is loaded lazily. So even an extra large result set can be loaded and + # processed with considerably small memory footprint and throughput + # consumption. + # + # @return [Enumerator::Lazy] # @since 0.2.0 def all records end + # Returns the actual number of items in a table matching the criteria. + # + # Post.where(links_count: 2).count + # + # Internally it uses either `Scan` or `Query` DynamoDB's operation so it + # costs like all the matching items were read from a table. + # + # The only difference is that items are read by DynemoDB but not actually + # loaded on the client side. DynamoDB returns only count of items after + # filtering. + # + # @return [Integer] def count if @key_fields_detector.key_present? count_via_query else count_via_scan end end - # Returns the last fetched record matched the criteria - # Enumerable doesn't implement `last`, only `first` - # So we have to implement it ourselves + # Returns the first item matching the criteria. # + # Post.where(links_count: 2).first + # + # Applies `record_limit(1)` to ensure only a single record is fetched + # when no non-key conditions are present and `scan_limit(1)` when no + # conditions are present at all. + # + # If used without criteria it just returns the first item of some + # arbitrary order. + # + # Post.first + # + # @return [Model|nil] + def first(*args) + n = args.first || 1 + + return scan_limit(n).to_a.first(*args) if @query.blank? + return super if @key_fields_detector.non_key_present? + + record_limit(n).to_a.first(*args) + end + + # Returns the last item matching the criteria. + # + # Post.where(links_count: 2).last + # + # DynamoDB doesn't support ordering by some arbitrary attribute except a + # sort key. So this method is mostly useful during development and + # testing. + # + # If used without criteria it just returns the last item of some arbitrary order. + # + # Post.last + # + # It isn't efficient from the performance point of view as far as it reads and + # loads all the filtered items from DynamoDB. + # + # @return [Model|nil] def last all.to_a.last end - # Destroys all the records matching the criteria. + # Deletes all the items matching the criteria. # + # Post.where(links_count: 2).delete_all + # + # If called without criteria then it deletes all the items in a table. + # + # Post.delete_all + # + # It loads all the items either with +Scan+ or +Query+ operation and + # deletes them in batch with +BatchWriteItem+ operation. +BatchWriteItem+ + # is limited by request size and items count so it's quite possible the + # deletion will require several +BatchWriteItem+ calls. def delete_all ids = [] ranges = [] if @key_fields_detector.key_present? @@ -115,57 +243,219 @@ Dynamoid.adapter.delete(source.table_name, ids, range_key: ranges.presence) end alias destroy_all delete_all - # The record limit is the limit of evaluated records returned by the - # query or scan. + # Set the record limit. + # + # The record limit is the limit of evaluated items returned by the + # +Query+ or +Scan+. In other words it's how many items should be + # returned in response. + # + # Post.where(links_count: 2).record_limit(1000) # => 1000 models + # Post.record_limit(1000) # => 1000 models + # + # It could be very inefficient in terms of HTTP requests in pathological + # cases. DynamoDB doesn't support out of the box the limits for items + # count after filtering. So it's possible to make a lot of HTTP requests + # to find items matching criteria and skip not matching. It means that + # the cost (read capacity units) is unpredictable. + # + # Because of such issues with performance and cost it's mostly useful in + # development and testing. + # + # When called without criteria it works like +scan_limit+. + # + # @return [Dynamoid::Criteria::Chain] def record_limit(limit) @record_limit = limit self end - # The scan limit which is the limit of records that DynamoDB will - # internally query or scan. This is different from the record limit - # as with filtering DynamoDB may look at N scanned records but return 0 - # records if none pass the filter. + # Set the scan limit. + # + # The scan limit is the limit of records that DynamoDB will internally + # read with +Query+ or +Scan+. It's different from the record limit as + # with filtering DynamoDB may look at N scanned items but return 0 + # items if none passes the filter. So it can return less items than was + # specified with the limit. + # + # Post.where(links_count: 2).scan_limit(1000) # => 850 models + # Post.scan_limit(1000) # => 1000 models + # + # By contrast with +record_limit+ the cost (read capacity units) and + # performance is predictable. + # + # When called without criteria it works like +record_limit+. + # + # @return [Dynamoid::Criteria::Chain] def scan_limit(limit) @scan_limit = limit self end + # Set the batch size. + # + # The batch size is a number of items which will be lazily loaded one by one. + # When the batch size is set then items will be loaded batch by batch of + # the specified size instead of relying on the default paging mechanism + # of DynamoDB. + # + # Post.where(links_count: 2).batch(1000).all.each do |post| + # # process a post + # end + # + # It's useful to limit memory usage or throughput consumption + # + # @return [Dynamoid::Criteria::Chain] def batch(batch_size) @batch_size = batch_size self end + # Set the start item. + # + # When the start item is set the items will be loaded starting right + # after the specified item. + # + # Post.where(links_count: 2).start(post) + # + # It can be used to implement an own pagination mechanism. + # + # Post.where(author_id: author_id).start(last_post).scan_limit(50) + # + # The specified start item will not be returned back in a result set. + # + # Actually it doesn't need all the item attributes to start - an item may + # have only the primary key attributes (partition and sort key if it's + # declared). + # + # Post.where(links_count: 2).start(Post.new(id: id)) + # + # It also supports a +Hash+ argument with the keys attributes - a + # partition key and a sort key (if it's declared). + # + # Post.where(links_count: 2).start(id: id) + # + # @return [Dynamoid::Criteria::Chain] def start(start) @start = start self end + # Reverse the sort order. + # + # By default the sort order is ascending (by the sort key value). Set a + # +false+ value to reverse the order. + # + # Post.where(id: id, 'views_count.gt' => 1000).scan_index_forward(false) + # + # It works only for queries with a partition key condition e.g. +id: + # 'some-id'+ which internally performs +Query+ operation. + # + # @return [Dynamoid::Criteria::Chain] def scan_index_forward(scan_index_forward) @scan_index_forward = scan_index_forward self end - # Allows you to use the results of a search as an enumerable over the results found. + # Allows to use the results of a search as an enumerable over the results + # found. # + # Post.each do |post| + # end + # + # Post.all.each do |post| + # end + # + # Post.where(links_count: 2).each do |post| + # end + # + # It works similar to the +all+ method so results are loaded lazily. + # # @since 0.2.0 def each(&block) records.each(&block) end + # Iterates over the pages returned by DynamoDB. + # + # DynamoDB has its own paging machanism and divides a large result set + # into separate pages. The +find_by_pages+ method provides access to + # these native DynamoDB pages. + # + # The pages are loaded lazily. + # + # Post.where('views_count.gt' => 1000).find_by_pages do |posts, options| + # # process posts + # end + # + # It passes as block argument an +Array+ of models and a Hash with options. + # + # Options +Hash+ contains only one option +:last_evaluated_key+. The last + # evaluated key is a Hash with key attributes of the last item processed by + # DynamoDB. It can be used to resume querying using the +start+ method. + # + # posts, options = Post.where('views_count.gt' => 1000).find_by_pages.first + # last_key = options[:last_evaluated_key] + # + # # ... + # + # Post.where('views_count.gt' => 1000).start(last_key).find_by_pages do |posts, options| + # end + # + # If it's called without a block then it returns an +Enumerator+. + # + # enum = Post.where('views_count.gt' => 1000).find_by_pages + # + # enum.each do |posts, options| + # # process posts + # end + # + # @return [Enumerator::Lazy] def find_by_pages(&block) pages.each(&block) end + # Select only specified fields. + # + # It takes one or more field names and returns a collection of models with only + # these fields set. + # + # Post.where('views_count.gt' => 1000).select(:title) + # Post.where('views_count.gt' => 1000).select(:title, :created_at) + # Post.select(:id) + # + # It can be used to avoid loading large field values and to decrease a + # memory footprint. + # + # @return [Dynamoid::Criteria::Chain] def project(*fields) @project = fields.map(&:to_sym) self end + # Select only specified fields. + # + # It takes one or more field names and returns an array of either values + # or arrays of values. + # + # Post.pluck(:id) # => ['1', '2'] + # Post.pluck(:title, :title) # => [['1', 'Title #1'], ['2', 'Title#2']] + # + # Post.where('views_count.gt' => 1000).pluck(:title) + # + # There are some differences between +pluck+ and +project+. +pluck+ + # - doesn't instantiate models + # - it isn't chainable and returns +Array+ instead of +Chain+ + # + # It deserializes values if a field type isn't supported by DynamoDB natively. + # + # It can be used to avoid loading large field values and to decrease a + # memory footprint. + # + # @return [Array] def pluck(*args) fields = args.map(&:to_sym) @project = fields if fields.many? @@ -323,26 +613,25 @@ { consistent_read: consistent_read } end def range_query opts = {} + query = self.query + # Honor STI and :type field if it presents + if @source.attributes.key?(@source.inheritance_field) && + @key_fields_detector.hash_key.to_sym != @source.inheritance_field.to_sym + query.update(sti_condition) + end + # Add hash key opts[:hash_key] = @key_fields_detector.hash_key opts[:hash_value] = type_cast_condition_parameter(@key_fields_detector.hash_key, query[@key_fields_detector.hash_key]) # Add range key if @key_fields_detector.range_key - opts[:range_key] = @key_fields_detector.range_key - if query[@key_fields_detector.range_key].present? - value = type_cast_condition_parameter(@key_fields_detector.range_key, query[@key_fields_detector.range_key]) - opts.update(range_eq: value) - end - - query.keys.select { |k| k.to_s =~ /^#{@key_fields_detector.range_key}\./ }.each do |key| - opts.merge!(range_hash(key)) - end + add_range_key_to_range_query(query, opts) end (query.keys.map(&:to_sym) - [@key_fields_detector.hash_key.to_sym, @key_fields_detector.range_key.try(:to_sym)]) .reject { |k, _| k.to_s =~ /^#{@key_fields_detector.range_key}\./ } .each do |key| @@ -355,10 +644,22 @@ end opts.merge(query_opts).merge(consistent_opts) end + def add_range_key_to_range_query(query, opts) + opts[:range_key] = @key_fields_detector.range_key + if query[@key_fields_detector.range_key].present? + value = type_cast_condition_parameter(@key_fields_detector.range_key, query[@key_fields_detector.range_key]) + opts.update(range_eq: value) + end + + query.keys.select { |k| k.to_s =~ /^#{@key_fields_detector.range_key}\./ }.each do |key| + opts.merge!(range_hash(key)) + end + end + # TODO: casting should be operator aware # e.g. for NULL operator value should be boolean # and isn't related to an attribute own type def type_cast_condition_parameter(key, value) return value if %i[array set].include?(source.attributes[key.to_sym][:type]) @@ -416,10 +717,17 @@ opts[:project] = @project opts end def scan_query + query = self.query + + # Honor STI and :type field if it presents + if sti_condition + query.update(sti_condition) + end + {}.tap do |opts| query.keys.map(&:to_sym).each do |key| if key.to_s.include?('.') opts.update(field_hash(key)) else @@ -437,9 +745,21 @@ opts[:batch_size] = @batch_size if @batch_size opts[:exclusive_start_key] = start_key if @start opts[:consistent_read] = true if @consistent_read opts[:project] = @project opts + end + + def sti_condition + condition = {} + type = @source.inheritance_field + + if @source.attributes.key?(type) + class_names = @source.deep_subclasses.map(&:name) << @source.name + condition[:"#{type}.in"] = class_names + end + + condition end end end end