lib/harvestdor/indexer.rb in harvestdor-indexer-2.0.0 vs lib/harvestdor/indexer.rb in harvestdor-indexer-2.1.0
- old
+ new
@@ -1,6 +1,5 @@
-
# external gems
require 'confstruct'
require 'rsolr'
require 'retries'
require 'parallel'
@@ -35,23 +34,23 @@
end
def config
@config ||= Confstruct::Configuration.new
end
-
+
def logger
@logger ||= begin
if config.harvestdor
Dir.mkdir(config.harvestdor.log_dir) unless File.directory?(config.harvestdor.log_dir)
Logger.new(File.join(config.harvestdor.log_dir, config.harvestdor.log_name), 'daily')
else
Logger.new STDERR
end
end
end
-
- # per this Indexer's config options
+
+ # per this Indexer's config options
# harvest the druids via DorFetcher
# create a Solr profiling document for each druid
# write the result to the Solr index
def harvest_and_index each_options = {in_threads: 4}
benchmark "Harvest and Indexing" do
@@ -77,22 +76,22 @@
metrics.tally on_error: method(:resource_error) do
yield resource
end
end
end
-
+
logger.info("Successful count: #{metrics.success_count}")
logger.info("Error count: #{metrics.error_count}")
logger.info("Total records processed: #{metrics.total}")
end
def resource_error e
if e.instance_of? Parallel::Break or e.instance_of? Parallel::Kill
raise e
end
end
-
+
# return Array of druids contained in the DorFetcher pulling indicated by DorFetcher params
# @return [Array<String>] or enumeration over it, if block is given. (strings are druids, e.g. ab123cd1234)
def druids
@druids ||= whitelist
end
@@ -110,18 +109,18 @@
# (e.g. things that are the same across all documents in the harvest)
solr.add doc_hash
# TODO: provide call to code to update DOR object's workflow datastream??
end
end
-
+
# @return an Array of druids ('oo000oo0000') that should be processed
def whitelist
@whitelist ||= config.whitelist if config.whitelist.is_a? Array
@whitelist ||= load_whitelist(config.whitelist) if config.whitelist
@whitelist ||= []
end
-
+
def harvestdor_client
@harvestdor_client ||= Harvestdor::Client.new(config.harvestdor)
end
def dor_fetcher_client
@@ -131,22 +130,22 @@
def solr
@solr ||= Harvestdor::Indexer::Solr.new self, config.solr.to_hash
end
protected #---------------------------------------------------------------------
-
+
# populate @whitelist as an Array of druids ('oo000oo0000') that WILL be processed
# by reading the File at the indicated path
# @param [String] path - path of file containing a list of druids
- def load_whitelist path
+ def load_whitelist(path)
@whitelist = load_id_list path
end
-
+
# return an Array of druids ('oo000oo0000')
# populated by reading the File at the indicated path
# @param [String] path - path of file containing a list of druids
# @return [Array<String>] an Array of druids
- def load_id_list path
+ def load_id_list(path)
list = File.open(path).each_line
.map { |line| line.strip }
.reject { |line| line.strip.start_with?('#') }
.reject { |line| line.empty? }
rescue
\ No newline at end of file