lib/assimilate/catalog.rb in assimilate-0.0.2 vs lib/assimilate/catalog.rb in assimilate-0.0.3
- old
+ new
@@ -1,32 +1,49 @@
require "yaml"
+# Catalog configuration:
+# db name of mongo database
+# catalog name of the catalog collection
+# batch name of the batches collection (e.g. "files")
+# domain key to use for specifying record domains (will be prefixed with _)
+# deletion_marker key to use to marker records that have disappeared from the source file
+#
# Records in each catalog acquire the following internal attributes:
-# _id Unique ID, assigned by mongo
-# _[domain] Domain key, specified with :domainkey attribute when initializing catalog
-# _dt_first_seen Batch datestamp reference for when this record was first captured
-# _dt_last_seen Batch datestamp reference for when this record was most recently affirmed
-# _dt_last_update Batch datestamp reference for when this record was most recently altered
-# _dt_removed Batch datestamp reference for when this record was removed from input
+# _id Unique ID, assigned by mongo
+# [domain] Domain key, specified with :domainkey attribute when initializing catalog
+# _dt_first_seen Batch datestamp reference for when this record was first captured
+# _dt_last_seen Batch datestamp reference for when this record was most recently affirmed
+# _dt_last_update Batch datestamp reference for when this record was most recently altered
+# [deletion_marker] Batch datestamp reference for when this record was removed from input
#
# Inbound records must not have attributes named with leading underscores.
#
# A "domain" here is a namespace of identifiers.
class Assimilate::Catalog
- attr_reader :catalog, :batches, :domainkey
+ attr_reader :catalog, :config, :batches
def initialize(args)
@config = YAML.load(File.open(args[:config]))
+ check_config
- @db = Mongo::Connection.new.db(@config['db'])
- @catalog = @db.collection(@config['catalog'])
- @batches = @db.collection(@config['batch'])
- @domainkey = @config['domain']
- @domainkey = "_#{@domainkey}" unless @domainkey =~ /^_/ # enforce leading underscore on internal attributes
+ @db = Mongo::Connection.new.db(@config[:db])
+ @catalog = @db.collection(@config[:catalog])
+ @batches = @db.collection(@config[:batch])
end
+ def check_config
+ config.symbolize_keys!
+ [:db, :catalog, :batch, :domain, :deletion_marker].each do |key|
+ raise Assimilate::InvalidConfiguration, "missing required parameter: #{key}" unless config[key]
+ end
+ [:domain, :deletion_marker].each do |key|
+ # enforce leading underscore on internal attributes
+ config[key] = "_#{config[key]}" unless config[key] =~ /^_/
+ end
+ end
+
def start_batch(args)
Assimilate::Batch.new(args.merge(:catalog => self))
end
def extend_data(args)
@@ -36,8 +53,11 @@
def where(params)
@catalog.find(params).first.select {|k,v| k !~ /^_/}
end
def active_count
- @catalog.find("_dt_removed" => nil).count
+ @catalog.find(config[:deletion_marker] => nil).count
end
+end
+
+class Assimilate::InvalidConfiguration < StandardError
end