lib/harvestdor-indexer.rb in harvestdor-indexer-0.0.11 vs lib/harvestdor-indexer.rb in harvestdor-indexer-0.0.13

- old
+ new

@@ -1,8 +1,9 @@ # external gems require 'confstruct' require 'rsolr' +require 'retries' # sul-dlss gems require 'harvestdor' require 'stanford-mods' @@ -19,11 +20,11 @@ attr_accessor :total_time_to_parse,:total_time_to_solr def initialize yml_path, options = {} @success_count=0 # the number of objects successfully indexed @error_count=0 # the number of objects that failed - @max_retries=5 # the number of times to retry an object + @max_retries=10 # the number of times to retry an object @total_time_to_solr=0 @total_time_to_parse=0 @yml_path = yml_path config.configure(YAML.load_file(yml_path)) if yml_path config.configure options @@ -52,17 +53,17 @@ end solr_client.commit total_time=elapsed_time(start_time) total_objects=@success_count+@error_count logger.info("Finished harvest_and_index at #{Time.now}: final Solr commit returned") - logger.info("Total elapsed time for harvest and index: #{(total_time/60.0)} minutes") - logger.info("Avg solr commit time per object (successful): #{@total_time_to_solr/@success_count} seconds") unless (@total_time_to_solr == 0 || @success_count == 0) - logger.info("Avg solr commit time per object (all): #{@total_time_to_solr/total_objects} seconds") unless (@total_time_to_solr == 0 || @error_count == 0 || total_objects == 0) - logger.info("Avg parse time per object (successful): #{@total_time_to_parse/@success_count} seconds") unless (@total_time_to_parse == 0 || @success_count == 0) - logger.info("Avg parse time per object (all): #{@total_time_to_parse/total_objects} seconds") unless (@total_time_to_parse == 0 || @error_count == 0 || total_objects == 0) - logger.info("Avg complete index time per object (successful): #{total_time/@success_count} seconds") unless (@success_count == 0) - logger.info("Avg complete index time per object (all): #{total_time/total_objects} seconds") unless (@error_count == 0 || total_object == 0) + logger.info("Total elapsed time for harvest and index: #{(total_time/60.0).round(2)} minutes") + logger.info("Avg solr commit time per object (successful): #{(@total_time_to_solr/@success_count).round(2)} seconds") unless (@total_time_to_solr == 0 || @success_count == 0) + logger.info("Avg solr commit time per object (all): #{(@total_time_to_solr/total_objects).round(2)} seconds") unless (@total_time_to_solr == 0 || @error_count == 0 || total_objects == 0) + logger.info("Avg parse time per object (successful): #{(@total_time_to_parse/@success_count).round(2)} seconds") unless (@total_time_to_parse == 0 || @success_count == 0) + logger.info("Avg parse time per object (all): #{(@total_time_to_parse/total_objects).round(2)} seconds") unless (@total_time_to_parse == 0 || @error_count == 0 || total_objects == 0) + logger.info("Avg complete index time per object (successful): #{(total_time/@success_count).round(2)} seconds") unless (@success_count == 0) + logger.info("Avg complete index time per object (all): #{(total_time/total_objects).round(2)} seconds") unless (@error_count == 0 || total_objects == 0) logger.info("Successful count: #{@success_count}") logger.info("Error count: #{@error_count}") logger.info("Total records processed: #{total_objects}") end @@ -76,30 +77,25 @@ logger.info("Completed OAI harves of druids at #{Time.now}. Found #{@druids.size} druids. Total elapsed time for OAI harvest = #{elapsed_time(start_time,:minutes)} minutes") end return @druids end - #add the document to solr, retry if an error occurs - def solr_add(doc, id, do_retry=true) - #if do_retry is false, skip retrying - tries=do_retry ? 0 : 999 - max_tries=@max_retries ? @max_retries : 5 #if @max_retries isn't set, use 5 - while tries < max_tries - begin - tries+=1 - solr_client.add(doc) - #return if successful - return - rescue => e - if tries<max_tries - logger.warn "#{id}: #{e.message}, retrying" - else - @error_count+=1 - logger.error "Failed saving #{id}: #{e.message}" - logger.error e.backtrace - return - end + # Add the document to solr, retry if an error occurs. + # See https://github.com/ooyala/retries for docs on with_retries. + # @param [Hash] doc a Hash representation of the solr document + # @param [String] id the id of the document being sent, for logging + def solr_add(doc, id) + max_tries=@max_retries ? @max_retries : 10 #if @max_retries isn't set, use 10 + + handler = Proc.new do |exception, attempt_number, total_delay| + logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}" + # logger.debug exception.backtrace end + + with_retries(:max_tries => max_tries, :handler => handler, :base_sleep_seconds => 1, :max_sleep_seconds => 5) do |attempt| + logger.debug "Attempt #{attempt} for #{id}" + solr_client.add(doc) + logger.info "Successfully indexed #{id} on attempt #{attempt}" end end # create Solr doc for the druid and add it to Solr, unless it is on the blacklist. # NOTE: don't forget to send commit to Solr, either once at end (already in harvest_and_index), or for each add, or ...