lib/tasks/index.rake in cul_hydra-1.0.6 vs lib/tasks/index.rake in cul_hydra-1.0.7
- old
+ new
@@ -1,5 +1,7 @@
+require 'thread/pool'
+
namespace :cul_hydra do
namespace :index do
task :recursive => :environment do
@@ -49,23 +51,60 @@
project_pid = ENV['PID']
else
puts 'Please specify a project PID (e.g. PID=cul:123)'
next
end
+
+ if ENV['THREADS'].present?
+ thread_pool_size = ENV['THREADS'].to_i
+ puts "Number of threads: #{thread_pool_size}"
+ else
+ thread_pool_size = 1
+ puts "Number of threads: #{thread_pool_size}"
+ end
skip_generic_resources = (ENV['skip_generic_resources'] == 'true')
+
+ ### Stop excessive ActiveFedora logging ###
+ # initialize the fedora connection if necessary
+ connection = (ActiveFedora::Base.fedora_connection[0] ||= ActiveFedora::RubydoraConnection.new(ActiveFedora.config.credentials)).connection
+ # the logger accessor is private
+ (connection.api.send :logger).level = Logger::INFO
start_time = Time.now
pids = Cul::Hydra::RisearchMembers.get_project_constituent_pids(project_pid, true)
total = pids.length
puts "Found #{total} project members."
counter = 0
+
+ # We run into autoloading issues when running in a multithreaded context,
+ # so we'll have the application eager load all classes now.
+ Rails.application.eager_load!
+ # Hack: Force load of classes that are giving autoload errors by referencing them below
+ BagAggregator.to_s
+ ContentAggregator.to_s
+ GenericResource.to_s
+
+ ###########################################
+ pool = Thread.pool(thread_pool_size)
+ mutex = Mutex.new
pids.each do |pid|
- Cul::Hydra::Indexer.index_pid(pid, skip_generic_resources, false)
- counter += 1
- puts "Indexed #{counter} of #{total} | #{Time.now - start_time} seconds"
+ pool.process {
+
+ Cul::Hydra::Indexer.index_pid(pid, skip_generic_resources, false)
+
+ mutex.synchronize do
+ counter += 1
+ puts "Indexed #{counter} of #{total} | #{Time.now - start_time} seconds"
+ end
+ }
end
+
+ pool.shutdown
+ ###########################################
+
+ puts 'Done'
end
task :by_publish_target_pid => :environment do
\ No newline at end of file