lib/assimilate/catalog.rb in assimilate-0.0.2 vs lib/assimilate/catalog.rb in assimilate-0.0.3

- old
+ new

@@ -1,32 +1,49 @@ require "yaml" +# Catalog configuration: +# db name of mongo database +# catalog name of the catalog collection +# batch name of the batches collection (e.g. "files") +# domain key to use for specifying record domains (will be prefixed with _) +# deletion_marker key to use to marker records that have disappeared from the source file +# # Records in each catalog acquire the following internal attributes: -# _id Unique ID, assigned by mongo -# _[domain] Domain key, specified with :domainkey attribute when initializing catalog -# _dt_first_seen Batch datestamp reference for when this record was first captured -# _dt_last_seen Batch datestamp reference for when this record was most recently affirmed -# _dt_last_update Batch datestamp reference for when this record was most recently altered -# _dt_removed Batch datestamp reference for when this record was removed from input +# _id Unique ID, assigned by mongo +# [domain] Domain key, specified with :domainkey attribute when initializing catalog +# _dt_first_seen Batch datestamp reference for when this record was first captured +# _dt_last_seen Batch datestamp reference for when this record was most recently affirmed +# _dt_last_update Batch datestamp reference for when this record was most recently altered +# [deletion_marker] Batch datestamp reference for when this record was removed from input # # Inbound records must not have attributes named with leading underscores. # # A "domain" here is a namespace of identifiers. class Assimilate::Catalog - attr_reader :catalog, :batches, :domainkey + attr_reader :catalog, :config, :batches def initialize(args) @config = YAML.load(File.open(args[:config])) + check_config - @db = Mongo::Connection.new.db(@config['db']) - @catalog = @db.collection(@config['catalog']) - @batches = @db.collection(@config['batch']) - @domainkey = @config['domain'] - @domainkey = "_#{@domainkey}" unless @domainkey =~ /^_/ # enforce leading underscore on internal attributes + @db = Mongo::Connection.new.db(@config[:db]) + @catalog = @db.collection(@config[:catalog]) + @batches = @db.collection(@config[:batch]) end + def check_config + config.symbolize_keys! + [:db, :catalog, :batch, :domain, :deletion_marker].each do |key| + raise Assimilate::InvalidConfiguration, "missing required parameter: #{key}" unless config[key] + end + [:domain, :deletion_marker].each do |key| + # enforce leading underscore on internal attributes + config[key] = "_#{config[key]}" unless config[key] =~ /^_/ + end + end + def start_batch(args) Assimilate::Batch.new(args.merge(:catalog => self)) end def extend_data(args) @@ -36,8 +53,11 @@ def where(params) @catalog.find(params).first.select {|k,v| k !~ /^_/} end def active_count - @catalog.find("_dt_removed" => nil).count + @catalog.find(config[:deletion_marker] => nil).count end +end + +class Assimilate::InvalidConfiguration < StandardError end