lib/harvestdor/indexer.rb in harvestdor-indexer-2.0.0 vs lib/harvestdor/indexer.rb in harvestdor-indexer-2.1.0

- old
+ new

@@ -1,6 +1,5 @@ - # external gems require 'confstruct' require 'rsolr' require 'retries' require 'parallel' @@ -35,23 +34,23 @@ end def config @config ||= Confstruct::Configuration.new end - + def logger @logger ||= begin if config.harvestdor Dir.mkdir(config.harvestdor.log_dir) unless File.directory?(config.harvestdor.log_dir) Logger.new(File.join(config.harvestdor.log_dir, config.harvestdor.log_name), 'daily') else Logger.new STDERR end end end - - # per this Indexer's config options + + # per this Indexer's config options # harvest the druids via DorFetcher # create a Solr profiling document for each druid # write the result to the Solr index def harvest_and_index each_options = {in_threads: 4} benchmark "Harvest and Indexing" do @@ -77,22 +76,22 @@ metrics.tally on_error: method(:resource_error) do yield resource end end end - + logger.info("Successful count: #{metrics.success_count}") logger.info("Error count: #{metrics.error_count}") logger.info("Total records processed: #{metrics.total}") end def resource_error e if e.instance_of? Parallel::Break or e.instance_of? Parallel::Kill raise e end end - + # return Array of druids contained in the DorFetcher pulling indicated by DorFetcher params # @return [Array<String>] or enumeration over it, if block is given. (strings are druids, e.g. ab123cd1234) def druids @druids ||= whitelist end @@ -110,18 +109,18 @@ # (e.g. things that are the same across all documents in the harvest) solr.add doc_hash # TODO: provide call to code to update DOR object's workflow datastream?? end end - + # @return an Array of druids ('oo000oo0000') that should be processed def whitelist @whitelist ||= config.whitelist if config.whitelist.is_a? Array @whitelist ||= load_whitelist(config.whitelist) if config.whitelist @whitelist ||= [] end - + def harvestdor_client @harvestdor_client ||= Harvestdor::Client.new(config.harvestdor) end def dor_fetcher_client @@ -131,22 +130,22 @@ def solr @solr ||= Harvestdor::Indexer::Solr.new self, config.solr.to_hash end protected #--------------------------------------------------------------------- - + # populate @whitelist as an Array of druids ('oo000oo0000') that WILL be processed # by reading the File at the indicated path # @param [String] path - path of file containing a list of druids - def load_whitelist path + def load_whitelist(path) @whitelist = load_id_list path end - + # return an Array of druids ('oo000oo0000') # populated by reading the File at the indicated path # @param [String] path - path of file containing a list of druids # @return [Array<String>] an Array of druids - def load_id_list path + def load_id_list(path) list = File.open(path).each_line .map { |line| line.strip } .reject { |line| line.strip.start_with?('#') } .reject { |line| line.empty? } rescue \ No newline at end of file