lib/assembly-utils/utils.rb in assembly-utils-1.4.5 vs lib/assembly-utils/utils.rb in assembly-utils-1.4.6
- old
+ new
@@ -2,23 +2,20 @@
require 'csv'
require 'csv-mapper'
require 'druid-tools'
begin
-require 'net/ssh/kerberos'
+ require 'net/ssh/kerberos'
rescue LoadError
end
module Assembly
-
-
# The Utils class contains methods to help with accessioning and assembly
class Utils
-
WFS = Dor::WorkflowService
REPO = 'dor'
-
+
# Get the staging directory tree given a druid, and optionally prepend a basepath.
# Deprecated and should not be needed anymore.
#
# @param [String] pid druid pid (e.g. 'aa000aa0001')
# @param [String] base_path optional base path to prepend to druid path
@@ -26,108 +23,91 @@
# @return [string] path to material that is being staged, with optional prepended base path
#
# Example:
# puts Assembly::Utils.get_staging_path('aa000aa0001','tmp')
# > "tmp/aa/000/aa/0001"
- def self.get_staging_path(pid,base_path=nil)
- d=DruidTools::Druid.new(pid,base_path)
- path=File.dirname(d.path)
- return path
+ def self.get_staging_path(pid, base_path = nil)
+ d = DruidTools::Druid.new(pid, base_path)
+ File.dirname(d.path)
end
# Insert the specified workflow into the specified object.
#
# @param [String] pid druid pid (e.g. 'aa000aa0001')
# @param [String] workflow name (e.g. 'accessionWF')
- # @param [String] repository name (e.g. 'dor') -- optional, defaults to dor
+ # @param [String] repository name (e.g. 'dor') -- optional, defaults to dor
#
# @return [boolean] indicates success of web service call
#
# Example:
# puts Assembly::Utils.insert_workflow('druid:aa000aa0001','accessionWF')
# > true
- def self.insert_workflow(pid,workflow,repo='dor')
- url = "#{Dor::Config.dor.service_root}/objects/#{pid}/apo_workflows/#{workflow}"
+ def self.insert_workflow(pid, workflow, repo = 'dor')
+ url = "#{Dor::Config.dor.service_root}/objects/#{pid}/apo_workflows/#{workflow}"
result = RestClient.post url, {}
- return ([200,201,202,204].include?(result.code) && result)
+ [200, 201, 202, 204].include?(result.code) && result
end
-
+
# Claim a specific druid as already used to be sure it won't get used again.
# Not needed for normal purposes, only if you manually register something in Fedora Admin outside of DOR services gem.
#
# @param [String] pid druid pid (e.g. 'aa000aa0001')
- #
# @return [boolean] indicates success of web service call
#
# Example:
# puts Assembly::Utils.claim_druid('aa000aa0001')
# > true
def self.claim_druid(pid)
- sc = Dor::Config.suri
- url = "#{sc.url}/suri2/namespaces/#{sc.id_namespace}"
- rcr = RestClient::Resource.new(url, :user => sc.user, :password => sc.pass)
- resp = rcr["identifiers/#{pid}"].put('')
- return resp.code == "204"
+ sc = Dor::Config.suri
+ url = "#{sc.url}/suri2/namespaces/#{sc.id_namespace}"
+ rcr = RestClient::Resource.new(url, :user => sc.user, :password => sc.pass)
+ resp = rcr["identifiers/#{pid}"].put('')
+ resp.code == '204'
end
-
- # Force a full re-index of the supplied druid in solr and fedora.
- #
- # @param [String] druid druid (e.g. 'druid:aa000aa0001')
- #
- # Example:
- # puts Assembly::Utils.reindex('druid:aa000aa0001')
- def self.reindex(druid)
- obj = Dor.load_instance druid
- solr_doc = obj.to_solr
- Dor::SearchService.solr.add(solr_doc, :add_attributes => {:commitWithin => 1000}) unless obj.nil?
- Dor.find(pid).update_index
- end
-
+
# Export one or more objects given a single or array of pids, with output to the specified directory as FOXML files
#
# @param [Array] pids - an array of pids to export (can also pass a single pid as a string)
# @param [String] output_dir - the full path to output the foxml files
#
# Example:
# Assembly::Utils.export_objects(['druid:aa000aa0001','druid:bb000bb0001'],'/tmp')
- def self.export_objects(pids,output_dir)
- pids=[pids] if pids.class==String
- pids.each {|pid| ActiveFedora::FixtureExporter.export_to_path(pid, output_dir)}
+ def self.export_objects(pids, output_dir)
+ pids = [pids] if pids.class == String
+ pids.each {|pid| ActiveFedora::FixtureExporter.export_to_path(pid, output_dir)}
end
# Import all of the FOXML files in the specified directory into Fedora
#
# @param [String] source_dir - the full path to import the foxml files
#
# Example:
# Assembly::Utils.import_objects('/tmp')
def self.import_objects(source_dir)
Dir.chdir(source_dir)
- files=Dir.glob('*.foxml.xml')
+ files = Dir.glob('*.foxml.xml')
files.each do |file|
- pid = ActiveFedora::FixtureLoader.import_to_fedora(File.join(source_dir,file))
+ pid = ActiveFedora::FixtureLoader.import_to_fedora(File.join(source_dir, file))
ActiveFedora::FixtureLoader.index(pid)
end
end
-
- # Get a list of druids that match the given array of source IDs.
+
+ # Get a list of druids that match the given array of source IDs.
# This method only works when this gem is used in a project that is configured to connect to DOR
#
# @param [String] source_ids array of source ids to lookup
- #
# @return [array] druids
# Example:
- #
# puts Assembly::Utils.get_druids_by_sourceid(['revs-01','revs-02'])
# > ['druid:aa000aa0001','druid:aa000aa0002']
def self.get_druids_by_sourceid(source_ids)
- druids=[]
- source_ids.each {|sid| druids << Dor::SearchService.query_by_id(sid)}
+ druids = []
+ source_ids.each {|sid| druids << Dor::SearchService.query_by_id(sid)}
druids.flatten
end
-
- # Show the workflow status of specific steps in assembly and/or accession workflows for the provided druids.
+
+ # Show the workflow status of specific steps in assembly and/or accession workflows for the provided druids.
# This method only works when this gem is used in a project that is configured to connect to DOR
#
# @param [Hash] params parameters specified as a hash, using symbols for options:
# * :druids => array of druids to get workflow status for
# * :workflows => an optional array of workflow names as symbols, options are :assembly and :accession; defaults to :assembly
@@ -135,45 +115,44 @@
#
# @return [string] comma delimited output or CSV file
#
# Example:
# Assembly::Utils.workflow_status(:druids=>['druid:aa000aa0001','druid:aa000aa0002'],:workflows=>[:assembly,:accession],:filename=>'output.csv')
- def self.workflow_status(params={})
+ def self.workflow_status(params = {})
- druids=params[:druids] || []
- workflows=params[:workflows] || [:assembly]
- filename=params[:filename] || ''
-
+ druids = params[:druids] || []
+ workflows = params[:workflows] || [:assembly]
+ filename = params[:filename] || ''
accession_steps = %w(content-metadata descriptive-metadata rights-metadata remediate-object shelve publish)
- assembly_steps = %w(jp2-create checksum-compute exif-collect accessioning-initiate)
+ assembly_steps = %w(jp2-create checksum-compute exif-collect accessioning-initiate)
- puts "Generating report"
+ puts 'Generating report'
- csv = CSV.open(filename, "w") if filename != ''
-
- header=["druid"]
- header << assembly_steps if workflows.include?(:assembly)
+ csv = CSV.open(filename, 'w') if filename != ''
+
+ header = ['druid']
+ header << assembly_steps if workflows.include?(:assembly)
header << accession_steps if workflows.include?(:accession)
csv << header.flatten if filename != ''
- puts header.join(',')
-
+ puts header.join(',')
+
druids.each do |druid|
- output=[druid]
- assembly_steps.each {|step| output << self.get_workflow_status(druid,'assemblyWF',step)} if workflows.include?(:assembly)
- accession_steps.each {|step| output << self.get_workflow_status(druid,'accessionWF',step)} if workflows.include?(:accession)
+ output = [druid]
+ assembly_steps.each {|step| output << get_workflow_status(druid, 'assemblyWF', step )} if workflows.include?(:assembly)
+ accession_steps.each {|step| output << get_workflow_status(druid, 'accessionWF', step)} if workflows.include?(:accession)
csv << output if filename != ''
puts output.join(',')
end
-
+
if filename != ''
- csv.close
+ csv.close
puts "Report generated in #{filename}"
end
end
- # Show the workflow status of a specific step in a specific workflow for the provided druid.
+ # Show the workflow status of a specific step in a specific workflow for the provided druid.
# This method only works when this gem is used in a project that is configured to connect to DOR
#
# @param [string] druid a druid string
# @param [string] workflow name of workflow
# @param [string] step name of step
@@ -181,158 +160,152 @@
# @return [string] workflow step status, returns nil if no workflow found
#
# Example:
# puts Assembly::Utils.get_workflow_status('druid:aa000aa0001','assemblyWF','jp2-create')
# > "completed"
- def self.get_workflow_status(druid,workflow,step)
- Dor::WorkflowService.get_workflow_status('dor', druid, workflow, step)
+ def self.get_workflow_status(druid, workflow, step)
+ Dor::WorkflowService.get_workflow_status('dor', druid, workflow, step)
end
-
- # Cleanup a list of objects and associated files given a list of druids. WARNING: VERY DESTRUCTIVE.
- # This method only works when this gem is used in a project that is configured to connect to DOR
- #
- # @param [Hash] params parameters specified as a hash, using symbols for options:
- # * :druids => array of druids to cleanup
- # * :steps => an array of steps, specified as symbols, indicating steps to be run, options are:
- # :stacks=This will remove all files from the stacks that were shelved for the objects
- # :dor=This will delete objects from Fedora
- # :stage=This will delete the staged content in the assembly workspace
- # :symlinks=This will remove the symlink from the dor workspace
- # :workflows=This will remove the assemblyWF and accessoiningWF workflows for this object
- # * :dry_run => do not actually clean up (defaults to false)
- #
- # Example:
- # Assembly::Utils.cleanup(:druids=>['druid:aa000aa0001','druid:aa000aa0002'],:steps=>[:stacks,:dor,:stage,:symlinks,:workflows])
- def self.cleanup(params={})
-
- druids=params[:druids] || []
- steps=params[:steps] || []
- dry_run=params[:dry_run] || false
-
- allowed_steps={:stacks=>'This will remove all files from the stacks that were shelved for the objects',
- :dor=>'This will delete objects from Fedora',
- :stage=>"This will delete the staged content in #{Assembly::ASSEMBLY_WORKSPACE}",
- :symlinks=>"This will remove the symlink from #{Assembly::DOR_WORKSPACE}",
- :workflows=>"This will remove the accessionWF and assemblyWF workflows"}
- num_steps=0
+ # Cleanup a list of objects and associated files given a list of druids. WARNING: VERY DESTRUCTIVE.
+ # This method only works when this gem is used in a project that is configured to connect to DOR
+ #
+ # @param [Hash] params parameters specified as a hash, using symbols for options:
+ # * :druids => array of druids to cleanup
+ # * :steps => an array of steps, specified as symbols, indicating steps to be run, options are:
+ # :stacks=This will remove all files from the stacks that were shelved for the objects
+ # :dor=This will delete objects from Fedora
+ # :stage=This will delete the staged content in the assembly workspace
+ # :symlinks=This will remove the symlink from the dor workspace
+ # :workflows=This will remove the assemblyWF and accessoiningWF workflows for this object
+ # * :dry_run => do not actually clean up (defaults to false)
+ #
+ # Example:
+ # Assembly::Utils.cleanup(:druids=>['druid:aa000aa0001','druid:aa000aa0002'],:steps=>[:stacks,:dor,:stage,:symlinks,:workflows])
+ def self.cleanup(params = {})
+ druids = params[:druids] || []
+ steps = params[:steps] || []
+ dry_run = params[:dry_run] || false
- puts 'THIS IS A DRY RUN' if dry_run
+ allowed_steps = {:stacks => 'This will remove all files from the stacks that were shelved for the objects',
+ :dor => 'This will delete objects from Fedora',
+ :stage => "This will delete the staged content in #{Assembly::ASSEMBLY_WORKSPACE}",
+ :symlinks => "This will remove the symlink from #{Assembly::DOR_WORKSPACE}",
+ :workflows => 'This will remove the accessionWF and assemblyWF workflows'}
- Assembly::Utils.confirm "Run on '#{ENV['ROBOT_ENVIRONMENT']}'? Any response other than 'y' or 'yes' will stop the cleanup now."
- Assembly::Utils.confirm "Are you really sure you want to run on production? CLEANUP IS NOT REVERSIBLE" if ENV['ROBOT_ENVIRONMENT'] == 'production'
+ num_steps = 0
- steps.each do |step|
- if allowed_steps.keys.include?(step)
- Assembly::Utils.confirm "Run step '#{step}'? #{allowed_steps[step]}. Any response other than 'y' or 'yes' will stop the cleanup now."
- num_steps+=1 # count the valid steps found and agreed to
- end
- end
+ puts 'THIS IS A DRY RUN' if dry_run
- raise "no valid steps specified for cleanup" if num_steps == 0
- raise "no druids provided" if druids.size == 0
-
- druids.each {|pid| Assembly::Utils.cleanup_object(pid,steps,dry_run)}
+ Assembly::Utils.confirm "Run on '#{ENV['ROBOT_ENVIRONMENT']}'? Any response other than 'y' or 'yes' will stop the cleanup now."
+ Assembly::Utils.confirm 'Are you really sure you want to run on production? CLEANUP IS NOT REVERSIBLE' if ENV['ROBOT_ENVIRONMENT'] == 'production'
+ steps.each do |step|
+ if allowed_steps.keys.include?(step)
+ Assembly::Utils.confirm "Run step '#{step}'? #{allowed_steps[step]}. Any response other than 'y' or 'yes' will stop the cleanup now."
+ num_steps += 1 # count the valid steps found and agreed to
+ end
+ end
+ raise 'no valid steps specified for cleanup' if num_steps == 0
+ raise 'no druids provided' if druids.size == 0
+
+ druids.each {|pid| Assembly::Utils.cleanup_object(pid, steps, dry_run)}
end
- # Cleanup a single objects and associated files given a druid. WARNING: VERY DESTRUCTIVE.
+ # Cleanup a single objects and associated files given a druid. WARNING: VERY DESTRUCTIVE.
# This method only works when this gem is used in a project that is configured to connect to DOR
#
# @param [string] pid a druid
- # @param [array] steps an array of steps, options below
+ # @param [array] steps an array of steps, options below
# :stacks=This will remove all files from the stacks that were shelved for the objects
# :dor=This will delete objects from Fedora
# :stage=This will delete the staged content in the assembly workspace
# :symlinks=This will remove the symlink from the dor workspace
# :workflows=This will remove the assemblyWF and accessoiningWF workflows for this object
# @param [boolean] dry_run do not actually clean up (defaults to false)
#
# Example:
# Assembly::Utils.cleanup_object('druid:aa000aa0001',[:stacks,:dor,:stage,:symlinks,:workflows])
- def self.cleanup_object(pid,steps,dry_run=false)
- begin
- # start up an SSH session if we are going to try and remove content from the stacks
- ssh_session=Net::SSH.start(Dor::Config.stacks.host,Dor::Config.stacks.user, :auth_methods => %w(gssapi-with-mic publickey hostbased password keyboard-interactive)) if steps.include?(:stacks) && defined?(stacks_server)
-
- druid_tree=DruidTools::Druid.new(pid).tree
- puts "Cleaning up #{pid}"
- if steps.include?(:dor)
- puts "-- deleting #{pid} from Fedora #{ENV['ROBOT_ENVIRONMENT']}"
- Assembly::Utils.unregister(pid) unless dry_run
- end
- if steps.include?(:symlinks)
- path_to_symlinks=[]
- path_to_symlinks << File.join(Assembly::DOR_WORKSPACE,druid_tree)
- path_to_symlinks << Assembly::Utils.get_staging_path(pid,Assembly::DOR_WORKSPACE)
- path_to_symlinks.each do |path|
- if File::directory?(path)
- puts "-- deleting folder #{path} (WARNING: should have been a symlink)"
- FileUtils::rm_rf path unless dry_run
- elsif File.symlink?(path)
- puts "-- deleting symlink #{path}"
- File.delete(path) unless dry_run
- else
- puts "-- Skipping #{path}: not a folder or symlink"
- end
- end
- end
- if steps.include?(:stage)
- path_to_content=Assembly::Utils.get_staging_path(pid,Assembly::ASSEMBLY_WORKSPACE)
- puts "-- deleting folder #{path_to_content}"
- FileUtils.rm_rf path_to_content if !dry_run && File.exists?(path_to_content)
- end
- if steps.include?(:stacks)
- path_to_content= Dor::DigitalStacksService.stacks_storage_dir(pid)
- puts "-- removing files from the stacks on #{stacks_server} at #{path_to_content}"
- ssh_session.exec!("rm -fr #{path_to_content}") unless dry_run
- end
- if steps.include?(:workflows)
- puts "-- deleting #{pid} accessionWF and assemblyWF workflows from Fedora #{ENV['ROBOT_ENVIRONMENT']}"
- unless dry_run
- Dor::WorkflowService.delete_workflow('dor',pid,'accessionWF')
- Dor::WorkflowService.delete_workflow('dor',pid,'assemblyWF')
- end
- end
- rescue Exception => e
- puts "** cleaning up failed for #{pid} with #{e.message}"
- end
- ssh_session.close if ssh_session
+ def self.cleanup_object(pid, steps, dry_run = false)
+ # start up an SSH session if we are going to try and remove content from the stacks
+ ssh_session = Net::SSH.start(Dor::Config.stacks.host, Dor::Config.stacks.user, :auth_methods => %w(gssapi-with-mic publickey hostbased password keyboard-interactive)) if steps.include?(:stacks) && defined?(stacks_server)
+
+ druid_tree = DruidTools::Druid.new(pid).tree
+ puts "Cleaning up #{pid}"
+ if steps.include?(:dor)
+ puts "-- deleting #{pid} from Fedora #{ENV['ROBOT_ENVIRONMENT']}"
+ Assembly::Utils.unregister(pid) unless dry_run
+ end
+ if steps.include?(:symlinks)
+ path_to_symlinks = []
+ path_to_symlinks << File.join(Assembly::DOR_WORKSPACE, druid_tree)
+ path_to_symlinks << Assembly::Utils.get_staging_path(pid, Assembly::DOR_WORKSPACE)
+ path_to_symlinks.each do |path|
+ if File.directory?(path)
+ puts "-- deleting folder #{path} (WARNING: should have been a symlink)"
+ FileUtils.rm_rf path unless dry_run
+ elsif File.symlink?(path)
+ puts "-- deleting symlink #{path}"
+ File.delete(path) unless dry_run
+ else
+ puts "-- Skipping #{path}: not a folder or symlink"
+ end
+ end
+ end
+ if steps.include?(:stage)
+ path_to_content = Assembly::Utils.get_staging_path(pid, Assembly::ASSEMBLY_WORKSPACE)
+ puts "-- deleting folder #{path_to_content}"
+ FileUtils.rm_rf path_to_content if !dry_run && File.exist?(path_to_content)
+ end
+ if steps.include?(:stacks)
+ path_to_content = Dor::DigitalStacksService.stacks_storage_dir(pid)
+ puts "-- removing files from the stacks on #{stacks_server} at #{path_to_content}"
+ ssh_session.exec!("rm -fr #{path_to_content}") unless dry_run
+ end
+ if steps.include?(:workflows)
+ puts "-- deleting #{pid} accessionWF and assemblyWF workflows from Fedora #{ENV['ROBOT_ENVIRONMENT']}"
+ unless dry_run
+ Dor::WorkflowService.delete_workflow('dor', pid, 'accessionWF')
+ Dor::WorkflowService.delete_workflow('dor', pid, 'assemblyWF')
+ end
+ end
+ rescue Exception => e
+ puts "** cleaning up failed for #{pid} with #{e.message}"
+ ensure
+ ssh_session.close if ssh_session
end
-
+
# Delete an object from DOR.
# This method only works when this gem is used in a project that is configured to connect to DOR
#
- # @param [string] pid the druid
+ # @param [string] pid the druid
#
# Example:
# Assembly::Utils.delete_from_dor('druid:aa000aa0001')
def self.delete_from_dor(pid)
-
Dor::Config.fedora.client["objects/#{pid}"].delete
Dor::SearchService.solr.delete_by_id(pid)
Dor::SearchService.solr.commit
-
end
-
+
# Quicky update rights metadata for any existing list of objects using default rights metadata pulled from the supplied APO
#
# @param [array] druids - an array of druids
# @param [string] apo_druid - the druid of the APO to pull rights metadata from
# @param [boolean] publish - defaults to false, if true, will publish each object after replacing datastreams (must be run on server with rights to do this)
- #
+ #
# Example:
# druids=%w{druid:aa111aa1111 druid:bb222bb2222}
# apo_druid='druid:cc222cc2222'
- # Assembly::Utils.update_rights_metadata(druids,apo_druid)
- def self.update_rights_metadata(druids,apo_druid,publish=false)
+ # Assembly::Utils.update_rights_metadata(druids,apo_druid)
+ def self.update_rights_metadata(druids, apo_druid, publish = false)
apo = Dor::Item.find(apo_druid)
rights_md = apo.datastreams['defaultObjectRights']
- self.replace_datastreams(druids,'rightsMetadata',rights_md.content,publish)
+ replace_datastreams(druids, 'rightsMetadata', rights_md.content, publish)
end
-
- # Replace a specific datastream for a series of objects in DOR with new content
+
+ # Replace a specific datastream for a series of objects in DOR with new content
#
# @param [array] druids - an array of druids
# @param [string] datastream_name - the name of the datastream to replace
# @param [string] new_content - the new content to replace the entire datastream with
# @param [boolean] publish - defaults to false, if true, will publish each object after replacing datastreams (must be run on server with rights to do this)
@@ -340,158 +313,149 @@
# Example:
# druids=%w{druid:aa111aa1111 druid:bb222bb2222}
# new_content='<xml><more nodes>this should be the whole datastream</more nodes></xml>'
# datastream='rightsMetadata'
# Assembly::Utils.replace_datastreams(druids,datastream,new_content)
- def self.replace_datastreams(druids,datastream_name,new_content,publish=false)
+ def self.replace_datastreams(druids, datastream_name, new_content, publish = false)
druids.each do |druid|
obj = Dor::Item.find(druid)
- ds = obj.datastreams[datastream_name]
+ ds = obj.datastreams[datastream_name]
if ds
- ds.content = new_content
+ ds.content = new_content
ds.save
puts "replaced #{datastream_name} for #{druid}"
if publish
obj.publish_metadata
- puts "--object re-published"
+ puts '--object re-published'
end
else
- puts "#{datastream_name} does not exist for #{druid}"
+ puts "#{datastream_name} does not exist for #{druid}"
end
- end
- end
+ end
+ end
- # Republish a list of druids. Only works when run from a server with access rights to the stacks (e.g. lyberservices-prod)
+ # Republish a list of druids. Only works when run from a server with access rights to the stacks (e.g. lyberservices-prod)
#
# @param [array] druids - an array of druids
- #
- # Example:
- # druids=%w{druid:aa111aa1111 druid:bb222bb2222}
- # Assembly::Utils.republish(druids)
+ #
+ # Example:
+ # druids=%w{druid:aa111aa1111 druid:bb222bb2222}
+ # Assembly::Utils.republish(druids)
def self.republish(druids)
druids.each do |druid|
obj = Dor::Item.find(druid)
obj.publish_metadata
puts "republished #{druid}"
end
end
-
+
# Determines if the specifed APO object contains a specified workflow defined in it
# DEPRACATED NOW THAT REIFED WORKFLOWS ARE USED
# @param [string] druid - the druid of the APO to check
# @param [string] workflow - the name of the workflow to check
- #
+ #
# @return [boolean] if workflow is defined in APO
- #
+ #
# Example:
# Assembly::Utils.apo_workflow_defined?('druid:oo000oo0001','assembly')
# > true
- def self.apo_workflow_defined?(druid,workflow)
- puts "************WARNING - THIS METHOD MAY NOT BE USEFUL ANYMORE SINCE WORKFLOWS ARE NO LONGER DEFINED IN THE APO**************"
+ def self.apo_workflow_defined?(druid, workflow)
+ puts '************WARNING - THIS METHOD MAY NOT BE USEFUL ANYMORE SINCE WORKFLOWS ARE NO LONGER DEFINED IN THE APO**************'
obj = Dor::Item.find(druid)
raise 'object not an APO' if obj.identityMetadata.objectType.first != 'adminPolicy'
- xml_doc=Nokogiri::XML(obj.administrativeMetadata.content)
+ xml_doc = Nokogiri::XML(obj.administrativeMetadata.content)
xml_doc.xpath("//#{workflow}").size == 1 || xml_doc.xpath("//*[@id='#{workflow}']").size == 1
end
-
+
# Determines if the specifed object is an APO
# @param [string] druid - the druid of the APO to check
- #
+ #
# @return [boolean] if object exist and is an APO
- #
+ #
# Example:
# Assembly::Utils.is_apo?('druid:oo000oo0001')
# > true
def self.is_apo?(druid)
- begin
- obj = Dor::Item.find(druid)
- return obj.identityMetadata.objectType.first == 'adminPolicy'
- rescue
- return false
- end
+ obj = Dor::Item.find(druid)
+ obj.identityMetadata.objectType.first == 'adminPolicy'
+ rescue
+ return false
end
-
- # Update a specific datastream for a series of objects in DOR by searching and replacing content
+
+ # Update a specific datastream for a series of objects in DOR by searching and replacing content
#
# @param [array] druids - an array of druids
# @param [string] datastream_name - the name of the datastream to replace
# @param [string] find_content - the content to find
# @param [string] replace_content - the content to replace the found content with
- #
+ #
# Example:
- # druids=%w{druid:aa111aa1111 druid:bb222bb2222}
- # find_content='FooBarBaz'
- # replace_content='Stanford Rules'
- # datastream='rightsMetadata'
- # Assembly::Utils.update_datastreams(druids,datastream,find_content,replace_content)
- def self.update_datastreams(druids,datastream_name,find_content,replace_content)
+ # druids = %w{druid:aa111aa1111 druid:bb222bb2222}
+ # find_content = 'FooBarBaz'
+ # replace_content = 'Stanford Rules'
+ # datastream = 'rightsMetadata'
+ # Assembly::Utils.update_datastreams(druids, datastream, find_content, replace_content)
+ def self.update_datastreams(druids, datastream_name, find_content, replace_content)
druids.each do |druid|
obj = Dor::Item.find(druid)
ds = obj.datastreams[datastream_name]
if ds
- updated_content=ds.content.gsub(find_content,replace_content)
+ updated_content = ds.content.gsub(find_content, replace_content)
ds.content = updated_content
ds.save
puts "updated #{datastream_name} for #{druid}"
else
- puts "#{datastream_name} does not exist for #{druid}"
+ puts "#{datastream_name} does not exist for #{druid}"
end
- end
- end
+ end
+ end
# Unregister a DOR object, which includes deleting it and deleting all its workflows
- #
# @param [string] pid of druid
- #
- # @return [boolean] if deletion succeed or not
+ # @return [boolean] if deletion succeed or not
def self.unregister(pid)
-
- begin
- Assembly::Utils.delete_all_workflows pid
- Assembly::Utils.delete_from_dor pid
- return true
- rescue
- return false
- end
-
+ Assembly::Utils.delete_all_workflows pid
+ Assembly::Utils.delete_from_dor pid
+ true
+ rescue
+ return false
end
# Set the workflow step for the given PID to an error state
- #
# @param [string] pid of druid
# @param [string] step to set to error
#
def self.set_workflow_step_to_error(pid, step)
wf_name = Assembly::ASSEMBLY_WF
msg = 'Integration testing'
- params = ['dor', pid, wf_name, step, msg]
+ params = ['dor', pid, wf_name, step, msg]
resp = Dor::WorkflowService.update_workflow_error_status *params
- raise "update_workflow_error_status() returned false." unless resp == true
+ raise 'update_workflow_error_status() returned false.' unless resp == true
end
# Delete all workflows for the given PID. Destructive and should only be used when deleting an object from DOR.
# This method only works when this gem is used in a project that is configured to connect to DOR
#
# @param [string] pid of druid
# @param [String] repo repository dealing with the workflow. Default is 'dor'. Another option is 'sdr'
- # e.g.
+ # e.g.
# Assembly::Utils.delete_all_workflows('druid:oo000oo0001')
- def self.delete_all_workflows(pid, repo='dor')
- Dor::WorkflowService.get_workflows(pid).each {|workflow| Dor::WorkflowService.delete_workflow(repo,pid,workflow)}
+ def self.delete_all_workflows(pid, repo = 'dor')
+ Dor::WorkflowService.get_workflows(pid).each {|workflow| Dor::WorkflowService.delete_workflow(repo, pid, workflow)}
end
# Reindex the supplied PID in solr.
#
# @param [string] pid of druid
- # e.g.
- # Assembly::Utils.reindex('druid:oo000oo0001')
+ # e.g.
+ # Assembly::Utils.reindex('druid:oo000oo0001')
def self.reindex(pid)
obj = Dor.load_instance pid
solr_doc = obj.to_solr
- Dor::SearchService.solr.add(solr_doc, :add_attributes => {:commitWithin => 1000}) unless obj.nil?
+ Dor::SearchService.solr.add(solr_doc, :add_attributes => {:commitWithin => 1000}) unless obj.nil?
end
-
+
# Clear stray workflows - remove any workflow steps for orphaned objects.
# This method only works when this gem is used in a project that is configured to connect to DOR
def self.clear_stray_workflows
repo = 'dor'
wf = 'assemblyWF'
@@ -505,19 +469,18 @@
druids.each do |dru|
params = [repo, dru, wf, waiting, msg]
resp = wfs.update_workflow_error_status *params
puts "updated: resp=#{resp} params=#{params.inspect}"
end
- end
+ end
end
# Check if the object is fully accessioned and ingested.
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
#
# @param [string] pid the druid to operate on
- #
- # @return [boolean] if object is fully ingested
+ # @return [boolean] if object is fully ingested
# Example:
# Assembly::Utils.is_ingested?('druid:oo000oo0001')
# > false
def self.is_ingested?(pid)
WFS.get_lifecycle(REPO, pid, 'accessioned') ? true : false
@@ -525,71 +488,66 @@
# Check if the object is currently in accessioning
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
#
# @param [string] pid the druid to operate on
- #
# @return [boolean] if object is currently in accessioning
# Example:
# Assembly::Utils.in_accessioning?('druid:oo000oo0001')
# > false
def self.in_accessioning?(pid)
WFS.get_active_lifecycle(REPO, pid, 'submitted') ? true : false
end
-
+
# Check if the object is on ingest hold
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
#
# @param [string] pid the druid to operate on
- #
# @return [boolean] if object is on ingest hold
# Example:
# Assembly::Utils.ingest_hold?('druid:oo000oo0001')
# > false
def self.ingest_hold?(pid)
- WFS.get_workflow_status(REPO, pid, 'accessionWF','sdr-ingest-transfer') == 'hold'
+ WFS.get_workflow_status(REPO, pid, 'accessionWF', 'sdr-ingest-transfer') == 'hold'
end
-
+
# Check if the object is submitted
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
#
# @param [string] pid the druid to operate on
- #
- # @return [boolean] if object is submitted
+ # @return [boolean] if object is submitted
# Example:
# Assembly::Utils.is_submitted?('druid:oo000oo0001')
- # > false
+ # > false
def self.is_submitted?(pid)
- WFS.get_lifecycle(REPO, pid, 'submitted') == nil
+ WFS.get_lifecycle(REPO, pid, 'submitted').nil?
end
# Check if the updates are allowed on the object
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
#
# @param [string] pid the druid to operate on
- #
- # @return [boolean] if object can be versioned and updated
+ # @return [boolean] if object can be versioned and updated
# Example:
# Assembly::Utils.updates_allowed?('druid:oo000oo0001')
- # > false
+ # > false
def self.updates_allowed?(pid)
!self.in_accessioning?(pid) && self.is_ingested?(pid)
end
# Check if versioning is required for the object
# This method only works when this gem is used in a project that is configured to connect to the workflow service.
#
# @param [string] pid the druid to operate on
- #
# @return [boolean] if object requires versioning
# Example:
# Assembly::Utils.versioning_required?('druid:oo000oo0001')
- # > false
+ # > false
def self.versioning_required?(pid)
- !((!self.is_ingested?(pid) && self.ingest_hold?(pid)) || (!self.is_ingested?(pid) && !self.is_submitted?(pid)))
+ !((!self.is_ingested?(pid) && self.ingest_hold?(pid)) || (!self.is_ingested?(pid) && !self.is_submitted?(pid)))
end
-
+
# Reset the workflow states for a list of druids given a list of workflow names and steps.
# Provide a list of druids in an array, and a hash containing workflow names (e.g. 'assemblyWF' or 'accessionWF') as the keys, and arrays of steps
# as the corresponding values (e.g. ['checksum-compute','jp2-create']) and they will all be reset to "waiting".
# This method only works when this gem is used in a project that is configured to connect to DOR
#
@@ -597,260 +555,238 @@
# * :druids => array of druids
# * :steps => a hash, containing workflow names as keys, and an array of steps
# * :state => a string for the name of the state to reset to, defaults to 'waiting' (could be 'completed' for example)
#
# Example:
- # druids=['druid:aa111aa1111','druid:bb222bb2222']
- # steps={'assemblyWF' => ['checksum-compute'],'accessionWF' => ['content-metadata','descriptive-metadata']}
- # Assembly::Utils.reset_workflow_states(:druids=>druids,:steps=>steps)
- def self.reset_workflow_states(params={})
- druids=params[:druids] || []
- workflows=params[:steps] || {}
- state=params[:state] || "waiting"
+ # druids = ['druid:aa111aa1111', 'druid:bb222bb2222']
+ # steps = {'assemblyWF' => ['checksum-compute'], 'accessionWF' => ['content-metadata', 'descriptive-metadata']}
+ # Assembly::Utils.reset_workflow_states(:druids => druids, :steps => steps)
+ def self.reset_workflow_states(params = {})
+ druids = params[:druids] || []
+ workflows = params[:steps] || {}
+ state = params[:state] || 'waiting'
druids.each do |druid|
- puts "** #{druid}"
- begin
- workflows.each do |workflow,steps|
- steps.each do |step|
- puts "Updating #{workflow}:#{step} to #{state}"
- Dor::WorkflowService.update_workflow_status 'dor',druid,workflow, step, state
- end
+ puts "** #{druid}"
+ begin
+ workflows.each do |workflow, steps|
+ steps.each do |step|
+ puts "Updating #{workflow}:#{step} to #{state}"
+ Dor::WorkflowService.update_workflow_status 'dor', druid, workflow, step, state
end
- rescue Exception => e
- puts "an error occurred trying to update workflows for #{druid} with message #{e.message}"
- end
+ end
+ rescue Exception => e
+ puts "an error occurred trying to update workflows for #{druid} with message #{e.message}"
+ end
end
end
# Get a list of druids from a CSV file which has a heading of "druid" and put them into a Ruby array.
# Useful if you want to import a report from argo
#
# @param [string] filename of CSV that has a column called "druid"
- #
# @return [array] array of druids
- #
# Example:
- # Assembly::Utils.read_druids_from_file('download.csv') # ['druid:xxxxx','druid:yyyyy']
+ # Assembly::Utils.read_druids_from_file('download.csv') # ['druid:xxxxx', 'druid:yyyyy']
def self.read_druids_from_file(csv_filename)
- rows=CsvMapper.import(csv_filename) do read_attributes_from_file end
- druids=[]
+ rows = CsvMapper.import(csv_filename) do read_attributes_from_file end
+ druids = []
rows.each do |row|
- druid=row.druid
- druid="druid:#{druid}" unless druid.include?('druid:')
+ druid = row.druid
+ druid = "druid:#{druid}" unless druid.include?('druid:')
druids << druid
end
- return druids
+ druids
end
-
+
# Get a list of druids that have errored out in a particular workflow and step
#
# @param [string] workflow name
# @param [string] step name
# @param [string] tag -- optional, if supplied, results will be filtered by the exact tag supplied; note this will dramatically slow down the response if there are many results
#
# @return [hash] hash of results, with key has a druid, and value as the error message
- # e.g.
+ # e.g.
# result=Assembly::Utils.get_errored_objects_for_workstep('accessionWF','content-metadata','Project : Revs')
# => {"druid:qd556jq0580"=>"druid:qd556jq0580 - Item error; caused by #<Rubydora::FedoraInvalidRequest: Error modifying datastream contentMetadata for druid:qd556jq0580. See logger for details>"}
- def self.get_errored_objects_for_workstep workflow, step, tag = ''
- result=Dor::WorkflowService.get_errored_objects_for_workstep workflow,step,'dor'
- if tag == ''
- return result
- else
- filtered_result={}
- result.each do |druid,error|
- begin
- item=Dor::Item.find(druid)
- filtered_result.merge!(druid=>error) if item.tags.include? tag
- rescue
- end
+ def self.get_errored_objects_for_workstep(workflow, step, tag = '')
+ result = Dor::WorkflowService.get_errored_objects_for_workstep workflow, step, 'dor'
+ return result if tag == ''
+ filtered_result = {}
+ result.each do |druid, error|
+ begin
+ item = Dor::Item.find(druid)
+ filtered_result.merge!(druid => error) if item.tags.include? tag
+ rescue
end
- return filtered_result
end
+ filtered_result
end
# Reset any objects in a specific workflow step and state that have errored out back to waiting
#
# @param [string] workflow name
# @param [string] step name
# @param [string] tag -- optional, if supplied, results will be filtered by the exact tag supplied; note this will dramatically slow down the response if there are many results
#
# @return [hash] hash of results that have been reset, with key has a druid, and value as the error message
- # e.g.
- # result=Assembly::Utils.reset_errored_objects_for_workstep('accessionWF','content-metadata')
- # => {"druid:qd556jq0580"=>"druid:qd556jq0580 - Item error; caused by #<Rubydora::FedoraInvalidRequest: Error modifying datastream contentMetadata for druid:qd556jq0580. See logger for details>"}
- def self.reset_errored_objects_for_workstep workflow, step, tag=''
- result=self.get_errored_objects_for_workstep workflow,step,tag
- druids=[]
- result.each {|k,v| druids << k}
- self.reset_workflow_states(:druids=>druids,:steps=>{workflow=>[step]}) if druids.size > 0
- return result
+ # e.g.
+ # result = Assembly::Utils.reset_errored_objects_for_workstep('accessionWF', 'content-metadata')
+ # => {"druid:qd556jq0580"=>"druid:qd556jq0580 - Item error; caused by #<Rubydora::FedoraInvalidRequest: Error modifying datastream contentMetadata for druid:qd556jq0580. See logger for details>"}
+ def self.reset_errored_objects_for_workstep(workflow, step, tag = '')
+ result = get_errored_objects_for_workstep workflow, step, tag
+ druids = []
+ result.each {|k, v| druids << k}
+ reset_workflow_states(:druids => druids, :steps => {workflow => [step]}) if druids.size > 0
+ result
end
# Read in a list of druids from a pre-assembly progress load file and load into an array.
#
# @param [string] progress_log_file filename
# @param [boolean] completed if true, returns druids that have completed, if false, returns druids that failed (defaults to true)
#
# @return [array] list of druids
#
- # Example:
- # druids=Assembly::Utils.get_druids_from_log('/dor/preassembly/sohp_accession_log.yaml')
+ # Example:
+ # druids = Assembly::Utils.get_druids_from_log '/dor/preassembly/sohp_accession_log.yaml'
# puts druids
- # > ['aa000aa0001','aa000aa0002']
- def self.get_druids_from_log(progress_log_file,completed=true)
- druids=[]
+ # > ['aa000aa0001', 'aa000aa0002']
+ def self.get_druids_from_log(progress_log_file, completed = true)
+ druids = []
docs = YAML.load_stream(Assembly::Utils.read_file(progress_log_file))
docs = docs.documents if docs.respond_to? :documents
- docs.each { |obj| druids << obj[:pid] if obj[:pre_assem_finished] == completed}
- return druids
+ docs.each { |obj| druids << obj[:pid] if obj[:pre_assem_finished] == completed}
+ druids
end
# Read in a YAML configuration file from disk and return a hash
#
# @param [string] filename of YAML config file to read
- #
# @return [hash] configuration contents as a hash
#
# Example:
# config_filename='/thumpers/dpgthumper2-smpl/SC1017_SOHP/sohp_prod_accession.yaml'
- # config=Assembly::Utils.load_config(config_filename)
+ # config=Assembly::Utils.load_config(config_filename)
# puts config['progress_log_file']
- # > "/dor/preassembly/sohp_accession_log.yaml"
+ # > "/dor/preassembly/sohp_accession_log.yaml"
def self.load_config(filename)
- YAML.load(Assembly::Utils.read_file(filename))
+ YAML.load(Assembly::Utils.read_file(filename))
end
# Read in a file from disk
#
# @param [string] filename to read
- #
# @return [string] file contents as a string
def self.read_file(filename)
- return File.readable?(filename) ? IO.read(filename) : ''
+ File.readable?(filename) ? IO.read(filename) : ''
end
- # Used by the completion_report and project_tag_report in the pre-assembly project
+ # Used by the completion_report and project_tag_report in the pre-assembly project
#
# @param [solr_document] doc a solr document result
# @param [boolean] check_status_in_dor indicates if we should check for the workflow states in dor or trust SOLR is up to date (defaults to false)
#
# @return [string] a comma delimited row for the report
- def self.solr_doc_parser(doc,check_status_in_dor=false)
-
+ def self.solr_doc_parser(doc, check_status_in_dor = false)
druid = doc[:id]
if Solrizer::VERSION < '3.0'
label = doc[:objectLabel_t]
- title=doc[:public_dc_title_t].nil? ? '' : doc[:public_dc_title_t].first
+ title = doc[:public_dc_title_t].nil? ? '' : doc[:public_dc_title_t].first
if check_status_in_dor
- accessioned = self.get_workflow_status(druid,'accessionWF','publish')=="completed"
- shelved = self.get_workflow_status(druid,'accessionWF','shelve')=="completed"
+ accessioned = get_workflow_status(druid, 'accessionWF', 'publish') == 'completed'
+ shelved = get_workflow_status(druid, 'accessionWF', 'shelve') == 'completed'
else
- accessioned = doc[:wf_wps_facet].nil? ? false : doc[:wf_wps_facet].include?("accessionWF:publish:completed")
- shelved = doc[:wf_wps_facet].nil? ? false : doc[:wf_wps_facet].include?("accessionWF:shelve:completed")
+ accessioned = doc[:wf_wps_facet].nil? ? false : doc[:wf_wps_facet].include?('accessionWF:publish:completed')
+ shelved = doc[:wf_wps_facet].nil? ? false : doc[:wf_wps_facet].include?('accessionWF:shelve:completed')
end
source_id = doc[:source_id_t]
- files=doc[:content_file_t]
+ files = doc[:content_file_t]
else
label = doc[Solrizer.solr_name('objectLabel', :displayable)]
title = doc.fetch(Solrizer.solr_name('public_dc_title', :displayable), []).first || ''
if check_status_in_dor
- accessioned = self.get_workflow_status(druid,'accessionWF','publish')=="completed"
- shelved = self.get_workflow_status(druid,'accessionWF','shelve')=="completed"
+ accessioned = get_workflow_status(druid, 'accessionWF', 'publish') == 'completed'
+ shelved = get_workflow_status(druid, 'accessionWF', 'shelve') == 'completed'
else
- accessioned = doc.fetch(Solrizer.solr_name('wf_wps', :symbol), []).include?("accessionWF:publish:completed")
- shelved = doc.fetch(Solrizer.solr_name('wf_wps', :symbol), []).include?("accessionWF:shelve:completed")
+ accessioned = doc.fetch(Solrizer.solr_name('wf_wps', :symbol), []).include?('accessionWF:publish:completed')
+ shelved = doc.fetch(Solrizer.solr_name('wf_wps', :symbol), []).include?('accessionWF:shelve:completed')
end
source_id = doc[Solrizer.solr_name('source_id', :symbol)]
- files=doc[Solrizer.solr_name('content_file', :symbol)]
+ files = doc[Solrizer.solr_name('content_file', :symbol)]
+ end
+ if files.nil?
+ file_type_list = ''
+ num_files = 0
+ else
+ num_files = files.size
+ # count the amount of each file type
+ file_types = Hash.new(0)
+ unless num_files == 0
+ files.each {|file| file_types[File.extname(file)] += 1}
+ file_type_list = file_types.map{|k, v| "#{k}=#{v}"}.join(' | ')
+ end
end
- if files.nil?
- file_type_list=""
- num_files=0
- else
- num_files = files.size
- # count the amount of each file type
- file_types=Hash.new(0)
- unless num_files == 0
- files.each {|file| file_types[File.extname(file)]+=1}
- file_type_list=file_types.map{|k,v| "#{k}=#{v}"}.join(' | ')
- end
- end
-
- purl_link = ""
- val = druid.split(/:/).last
- purl_link = File.join(Assembly::PURL_BASE_URL, val)
-
- return [druid, label, title, source_id, accessioned, shelved, purl_link, num_files,file_type_list]
-
+ val = druid.split(/:/).last
+ purl_link = File.join(Assembly::PURL_BASE_URL, val)
+ [druid, label, title, source_id, accessioned, shelved, purl_link, num_files, file_type_list]
end
# Takes a hash data structure and recursively converts all hash keys from strings to symbols.
#
# @param [hash] h hash
- #
# @return [hash] a hash with all keys converted from strings to symbols
#
# Example:
- # Assembly::Utils.symbolize_keys({'dude'=>'is cool','i'=>'am too'})
- # > {:dude=>"is cool", :i=>"am too"}
+ # Assembly::Utils.symbolize_keys({'dude' => 'is cool', 'i' => 'am too'})
+ # > {:dude => "is cool", :i => "am too"}
def self.symbolize_keys(h)
if h.instance_of? Hash
- h.inject({}) { |hh,(k,v)| hh[k.to_sym] = symbolize_keys(v); hh }
+ h.inject({}) { |hh, (k, v)| hh[k.to_sym] = symbolize_keys(v); hh }
elsif h.instance_of? Array
h.map { |v| symbolize_keys(v) }
else
h
end
end
# Takes a hash and converts its string values to symbols -- not recursively.
- #
# @param [hash] h hash
- #
- # @return [hash] a hash with all keys converted from strings to symbols
- #
+ # @return [hash] a hash with all values converted from strings to symbols
# Example:
- # Assembly::Utils.values_to_symbols!({'dude'=>'iscool','i'=>'amtoo'})
- # > {"i"=>:amtoo, "dude"=>:iscool}
+ # Assembly::Utils.values_to_symbols!({'dude' => 'iscool', 'i' => 'amtoo'})
+ # > {'i' => :amtoo, 'dude' => :iscool}
def self.values_to_symbols!(h)
- h.each { |k,v| h[k] = v.to_sym if v.class == String }
- end
+ h.each { |k, v| h[k] = v.to_sym if v.class == String }
+ end
# Removes any duplicate tags within each druid
- #
# @param [array] druids - an array of druids
def self.remove_duplicate_tags(druids)
druids.each do |druid|
i = Dor::Item.find(druid)
- if i and i.tags.size > 1 # multiple tags
- i.tags.each do |tag|
- if (i.tags.select {|t| t == tag}).size > 1 # tag is duplicate
- i.remove_tag(tag)
- i.add_tag(tag)
- puts "Saving #{druid} to remove duplicate tag='#{tag}'"
- i.save
- end
- end
+ next unless i && i.tags.size > 1 # multiple tags
+ i.tags.each do |tag|
+ next unless (i.tags.select {|t| t == tag}).size > 1 # tag is duplicate
+ i.remove_tag(tag)
+ i.add_tag(tag)
+ puts "Saving #{druid} to remove duplicate tag='#{tag}'"
+ i.save
end
end
end
- private
- # Used by the cleanup to ask user for confirmation of each step. Any response other than 'yes' results in the raising of an error
- #
+ # Used by the cleanup to ask user for confirmation of each step. Any response other than 'yes' raises an error
# @param [string] message the message to show to a user
#
def self.confirm(message)
puts message
- response=gets.chomp.downcase
- raise "Exiting" if response != 'y' && response != 'yes'
+ response = gets.chomp.downcase
+ raise 'Exiting' if response != 'y' && response != 'yes'
end
end
-
end