Sha256: ee86d555334df540061938e95a1151050e9540f8986c5e05c9fb4f4621442b67
Contents?: true
Size: 1.84 KB
Versions: 11
Compression:
Stored size: 1.84 KB
Contents
module RocketJob class LookupCollection < Mongo::Collection # Rapidly upload individual records in batches. # # Operates directly on a Mongo Collection to avoid the overhead of creating Mongoid objects # for each and every row. # # Example: # lookup_collection(:my_lookup).upload do |io| # io << {id: 123, data: "first record"} # io << {id: 124, data: "second record"} # end # # input_category(:my_lookup).find(id: 123).first def upload(batch_size: 10_000, &block) BatchUploader.upload(batch_size: batch_size, &block) end # Looks up the value at the specified id. # Returns [nil] if no record was found with the supplied id. def lookup(id) find(id: id).first end # Internal class for uploading records in batches class BatchUploader attr_reader :record_count def self.upload(collection, **args) writer = new(collection, **args) yield(writer) writer.record_count ensure writer&.close end def initialize(collection, batch_size:) @batch_size = batch_size @record_count = 0 @batch_count = 0 @documents = [] @collection = collection end def <<(record) raise(ArgumentError, "Record must be a Hash") unless record.is_a?(Hash) unless record.key?(:id) || record.key?("id") || record.key?("_id") raise(ArgumentError, "Record must include an :id key") end @documents << record @record_count += 1 @batch_count += 1 if @batch_count >= @batch_size @collection.insert_many(@documents) @documents.clear @batch_count = 0 end self end def close @collection.insert_many(@documents) unless @documents.empty? end end end end
Version data entries
11 entries across 11 versions & 1 rubygems