Sha256: ee86d555334df540061938e95a1151050e9540f8986c5e05c9fb4f4621442b67

Contents?: true

Size: 1.84 KB

Versions: 11

Compression:

Stored size: 1.84 KB

Contents

module RocketJob
  class LookupCollection < Mongo::Collection
    # Rapidly upload individual records in batches.
    #
    # Operates directly on a Mongo Collection to avoid the overhead of creating Mongoid objects
    # for each and every row.
    #
    # Example:
    #   lookup_collection(:my_lookup).upload do |io|
    #     io << {id: 123, data: "first record"}
    #     io << {id: 124, data: "second record"}
    #   end
    #
    #   input_category(:my_lookup).find(id: 123).first
    def upload(batch_size: 10_000, &block)
      BatchUploader.upload(batch_size: batch_size, &block)
    end

    # Looks up the value at the specified id.
    # Returns [nil] if no record was found with the supplied id.
    def lookup(id)
      find(id: id).first
    end

    # Internal class for uploading records in batches
    class BatchUploader
      attr_reader :record_count

      def self.upload(collection, **args)
        writer = new(collection, **args)
        yield(writer)
        writer.record_count
      ensure
        writer&.close
      end

      def initialize(collection, batch_size:)
        @batch_size   = batch_size
        @record_count = 0
        @batch_count  = 0
        @documents    = []
        @collection   = collection
      end

      def <<(record)
        raise(ArgumentError, "Record must be a Hash") unless record.is_a?(Hash)

        unless record.key?(:id) || record.key?("id") || record.key?("_id")
          raise(ArgumentError, "Record must include an :id key")
        end

        @documents << record
        @record_count += 1
        @batch_count  += 1
        if @batch_count >= @batch_size
          @collection.insert_many(@documents)
          @documents.clear
          @batch_count = 0
        end

        self
      end

      def close
        @collection.insert_many(@documents) unless @documents.empty?
      end
    end
  end
end

Version data entries

11 entries across 11 versions & 1 rubygems

Version Path
rocketjob-6.3.1 lib/rocket_job/lookup_collection.rb
rocketjob-6.3.0 lib/rocket_job/lookup_collection.rb
rocketjob-6.2.0 lib/rocket_job/lookup_collection.rb
rocketjob-6.1.1 lib/rocket_job/lookup_collection.rb
rocketjob-6.1.0 lib/rocket_job/lookup_collection.rb
rocketjob-6.0.3 lib/rocket_job/lookup_collection.rb
rocketjob-6.0.2 lib/rocket_job/lookup_collection.rb
rocketjob-6.0.1 lib/rocket_job/lookup_collection.rb
rocketjob-6.0.0 lib/rocket_job/lookup_collection.rb
rocketjob-6.0.0.rc3 lib/rocket_job/lookup_collection.rb
rocketjob-6.0.0.rc2 lib/rocket_job/lookup_collection.rb