app/jobs/bulkrax/create_relationships_job.rb in bulkrax-5.0.0 vs app/jobs/bulkrax/create_relationships_job.rb in bulkrax-5.1.0

- old
+ new

@@ -15,99 +15,115 @@ # NOTE: In the context of this job, "record" is used to generically refer # to either an instance of a Work or an instance of a Collection. # NOTE: In the context of this job, "identifier" is used to generically refer # to either a record's ID or an Bulkrax::Entry's source_identifier. class CreateRelationshipsJob < ApplicationJob + ## + # @api public + # @since v5.0.1 + # + # Once we've created the relationships, should we then index the works's file_sets to ensure + # that we have the proper indexed values. This can help set things like `is_page_of_ssim` for + # IIIF manifest and search results of file sets. + # + # @note As of v5.0.1 the default behavior is to not perform this. That preserves past + # implementations. However, we might determine that we want to change the default + # behavior. Which would likely mean a major version change. + # + # @example + # # In config/initializers/bulkrax.rb + # Bulkrax::CreateRelationshipsJob.update_child_records_works_file_sets = true + # + # @see https://github.com/scientist-softserv/louisville-hyku/commit/128a9ef + class_attribute :update_child_records_works_file_sets, default: false + include DynamicRecordLookup queue_as :import - attr_accessor :child_records, :child_entry, :parent_record, :parent_entry, :importer_run_id - # @param parent_identifier [String] Work/Collection ID or Bulkrax::Entry source_identifiers # @param importer_run [Bulkrax::ImporterRun] current importer run (needed to properly update counters) # # The entry_identifier is used to lookup the @base_entry for the job (a.k.a. the entry the job was called from). # The @base_entry defines the context of the relationship (e.g. "this entry (@base_entry) should have a parent"). # Whether the @base_entry is the parent or the child in the relationship is determined by the presence of a # parent_identifier or child_identifier param. For example, if a parent_identifier is passed, we know @base_entry # is the child in the relationship, and vice versa if a child_identifier is passed. + # + # rubocop:disable Metrics/MethodLength def perform(parent_identifier:, importer_run_id:) # rubocop:disable Metrics/AbcSize - pending_relationships = Bulkrax::PendingRelationship.find_each.select do |rel| - rel.importer_run_id == importer_run_id && rel.parent_id == parent_identifier - end.sort_by(&:order) + importer_run = Bulkrax::ImporterRun.find(importer_run_id) + ability = Ability.new(importer_run.user) - @importer_run_id = importer_run_id - @parent_entry, @parent_record = find_record(parent_identifier, importer_run_id) - @child_records = { works: [], collections: [] } - pending_relationships.each do |rel| - raise ::StandardError, %("#{rel}" needs either a child or a parent to create a relationship) if rel.child_id.nil? || rel.parent_id.nil? - @child_entry, child_record = find_record(rel.child_id, importer_run_id) - if child_record - child_record.is_a?(::Collection) ? @child_records[:collections] << child_record : @child_records[:works] << child_record + parent_entry, parent_record = find_record(parent_identifier, importer_run_id) + + number_of_successes = 0 + number_of_failures = 0 + errors = [] + + ActiveRecord::Base.uncached do + Bulkrax::PendingRelationship.where(parent_id: parent_identifier, importer_run_id: importer_run_id) + .ordered.find_each do |rel| + process(relationship: rel, importer_run_id: importer_run_id, parent_record: parent_record, ability: ability) + number_of_successes += 1 + rescue => e + number_of_failures += 1 + errors << e end end - if (child_records[:collections].blank? && child_records[:works].blank?) || parent_record.nil? + # save record if members were added + parent_record.save! if @parent_record_members_added + + # rubocop:disable Rails/SkipsModelValidations + if errors.present? + importer_run.increment!(:failed_relationships, number_of_failures) + parent_entry&.set_status_info(errors.last, importer_run) + + # TODO: This can create an infinite job cycle, consider a time to live tracker. reschedule({ parent_identifier: parent_identifier, importer_run_id: importer_run_id }) return false # stop current job from continuing to run after rescheduling + else + Bulkrax::ImporterRun.find(importer_run_id).increment!(:processed_relationships, number_of_successes) end - @parent_entry ||= Bulkrax::Entry.where(identifier: parent_identifier, - importerexporter_id: ImporterRun.find(importer_run_id).importer_id, - importerexporter_type: "Bulkrax::Importer").first - create_relationships - pending_relationships.each(&:destroy) - rescue ::StandardError => e - parent_entry ? parent_entry.status_info(e) : child_entry.status_info(e) - Bulkrax::ImporterRun.find(importer_run_id).increment!(:failed_relationships) # rubocop:disable Rails/SkipsModelValidations + # rubocop:enable Rails/SkipsModelValidations end + # rubocop:enable Metrics/MethodLength private - def create_relationships - if parent_record.is_a?(::Collection) - collection_parent_work_child unless child_records[:works].empty? - collection_parent_collection_child unless child_records[:collections].empty? - else - work_parent_work_child unless child_records[:works].empty? - raise ::StandardError, 'a Collection may not be assigned as a child of a Work' if child_records[:collections].present? - end - end + def process(relationship:, importer_run_id:, parent_record:, ability:) + raise "#{relationship} needs a child to create relationship" if relationship.child_id.nil? + raise "#{relationship} needs a parent to create relationship" if relationship.parent_id.nil? - def user - @user ||= Bulkrax::ImporterRun.find(importer_run_id).importer.user - end + _child_entry, child_record = find_record(relationship.child_id, importer_run_id) + raise "#{relationship} could not find child record" unless child_record - # Work-Collection membership is added to the child as member_of_collection_ids - # This is adding the reverse relationship, from the child to the parent - def collection_parent_work_child - child_work_ids = child_records[:works].map(&:id) - parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX) + raise "Cannot add child collection (ID=#{relationship.child_id}) to parent work (ID=#{relationship.parent_id})" if child_record.collection? && parent_record.work? - parent_record.add_member_objects(child_work_ids) - ImporterRun.find(importer_run_id).increment!(:processed_relationships, child_work_ids.count) # rubocop:disable Rails/SkipsModelValidations + ability.authorize!(:edit, child_record) + + # We could do this outside of the loop, but that could lead to odd counter failures. + ability.authorize!(:edit, parent_record) + + parent_record.is_a?(Collection) ? add_to_collection(child_record, parent_record) : add_to_work(child_record, parent_record) + + child_record.file_sets.each(&:update_index) if update_child_records_works_file_sets? && child_record.respond_to?(:file_sets) + relationship.destroy end - # Collection-Collection membership is added to the as member_ids - def collection_parent_collection_child - child_records[:collections].each do |child_record| - ::Hyrax::Collections::NestedCollectionPersistenceService.persist_nested_collection_for(parent: parent_record, child: child_record) - ImporterRun.find(importer_run_id).increment!(:processed_relationships) # rubocop:disable Rails/SkipsModelValidations - end + def add_to_collection(child_record, parent_record) + child_record.member_of_collections << parent_record + child_record.save! end - # Work-Work membership is added to the parent as member_ids - def work_parent_work_child - records_hash = {} - child_records[:works].each_with_index do |child_record, i| - records_hash[i] = { id: child_record.id } - end - attrs = { work_members_attributes: records_hash } - parent_record.try(:reindex_extent=, Hyrax::Adapters::NestingIndexAdapter::LIMITED_REINDEX) - env = Hyrax::Actors::Environment.new(parent_record, Ability.new(user), attrs) + def add_to_work(child_record, parent_record) + return true if parent_record.ordered_members.to_a.include?(child_record) - Hyrax::CurationConcern.actor.update(env) - ImporterRun.find(importer_run_id).increment!(:processed_relationships, child_records[:works].count) # rubocop:disable Rails/SkipsModelValidations + parent_record.ordered_members << child_record + @parent_record_members_added = true + # TODO: Do we need to save the child record? + child_record.save! end def reschedule(parent_identifier:, importer_run_id:) CreateRelationshipsJob.set(wait: 10.minutes).perform_later( parent_identifier: parent_identifier,