app/parsers/bulkrax/application_parser.rb in bulkrax-4.3.1 vs app/parsers/bulkrax/application_parser.rb in bulkrax-4.4.0

- old
+ new

@@ -1,79 +1,110 @@ # frozen_string_literal: true require 'zip' module Bulkrax + # An abstract class that establishes the API for Bulkrax's import and export parsing. + # + # @abstract Subclass the Bulkrax::ApplicationParser to create a parser that handles a specific format (e.g. CSV, Bagit, XML, etc). class ApplicationParser # rubocop:disable Metrics/ClassLength attr_accessor :importerexporter, :headers alias importer importerexporter alias exporter importerexporter delegate :only_updates, :limit, :current_run, :errors, :mapping, :seen, :increment_counters, :parser_fields, :user, :keys_without_numbers, :key_without_numbers, :status, :status_info, :status_at, :exporter_export_path, :exporter_export_zip_path, :importer_unzip_path, :validate_only, to: :importerexporter + # @todo Convert to `class_attribute :parser_fiels, default: {}` def self.parser_fields {} end + # @return [TrueClass,FalseClass] this parser does or does not support exports. + # + # @todo Convert to `class_attribute :export_supported, default: false, instance_predicate: true` and `self << class; alias export_supported? export_supported; end` def self.export_supported? false end + # @return [TrueClass,FalseClass] this parser does or does not support imports. + # + # @todo Convert to `class_attribute :import_supported, default: false, instance_predicate: true` and `self << class; alias import_supported? import_supported; end` def self.import_supported? true end def initialize(importerexporter) @importerexporter = importerexporter @headers = [] end - # @api + # @api public + # @abstract Subclass and override {#entry_class} to implement behavior for the parser. def entry_class - raise StandardError, 'must be defined' + raise NotImplementedError, 'must be defined' end - # @api + # @api public + # @abstract Subclass and override {#collection_entry_class} to implement behavior for the parser. def collection_entry_class - raise StandardError, 'must be defined' + raise NotImplementedError, 'must be defined' end - # @api + # @api public + # @abstract Subclass and override {#records} to implement behavior for the parser. def records(_opts = {}) - raise StandardError, 'must be defined' + raise NotImplementedError, 'must be defined' end + # @return [Symbol] the name of the identifying property in the source system from which we're + # importing (e.g. is *not* this application that mounts *this* Bulkrax engine). + # + # @see #work_identifier + # @see https://github.com/samvera-labs/bulkrax/wiki/CSV-Importer#source-identifier Bulkrax Wiki regarding source identifier def source_identifier @source_identifier ||= get_field_mapping_hash_for('source_identifier')&.values&.first&.[]('from')&.first&.to_sym || :source_identifier end + # @return [Symbol] the name of the identifying property for the system which we're importing + # into (e.g. the application that mounts *this* Bulkrax engine) + # @see #source_identifier def work_identifier @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source end + # @return [String] def generated_metadata_mapping @generated_metadata_mapping ||= 'generated' end + # @return [String, NilClass] + # @see #related_parents_raw_mapping def related_parents_raw_mapping @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first end + # @return [String] + # @see #related_parents_field_mapping def related_parents_parsed_mapping @related_parents_parsed_mapping ||= (get_field_mapping_hash_for('related_parents_field_mapping')&.keys&.first || 'parents') end + # @return [String, NilClass] + # @see #related_children_parsed_mapping def related_children_raw_mapping @related_children_raw_mapping ||= get_field_mapping_hash_for('related_children_field_mapping')&.values&.first&.[]('from')&.first end + # @return [String] + # @see #related_children_raw_mapping def related_children_parsed_mapping @related_children_parsed_mapping ||= (get_field_mapping_hash_for('related_children_field_mapping')&.keys&.first || 'children') end + # @api private def get_field_mapping_hash_for(key) return instance_variable_get("@#{key}_hash") if instance_variable_get("@#{key}_hash").present? mapping = importerexporter.field_mapping.is_a?(Hash) ? importerexporter.field_mapping : {} instance_variable_set( @@ -83,101 +114,138 @@ raise StandardError, "more than one #{key} declared: #{instance_variable_get("@#{key}_hash").keys.join(', ')}" if instance_variable_get("@#{key}_hash").length > 1 instance_variable_get("@#{key}_hash") end + # @return [Array<String>] def model_field_mappings model_mappings = Bulkrax.field_mappings[self.class.to_s]&.dig('model', :from) || [] model_mappings |= ['model'] model_mappings end + # @return [String] def perform_method if self.validate_only 'perform_now' else 'perform_later' end end + # The visibility of the record. Acceptable values are: "open", "embaro", "lease", "authenticated", "restricted". The default is "open" + # + # @return [String] + # @see https://github.com/samvera/hydra-head/blob/main/hydra-access-controls/app/models/concerns/hydra/access_controls/access_right.rb Hydra::AccessControls::AccessRight for details on the range of values. + # @see https://github.com/samvera/hyrax/blob/bd2bcffc33e183904be2c175367648815f25bc2b/app/services/hyrax/visibility_intention.rb Hyrax::VisibilityIntention for how we process the visibility. def visibility @visibility ||= self.parser_fields['visibility'] || 'open' end + # @api public + # + # @param types [Array<Symbol>] the types of objects that we'll create. + # + # @see Bulkrax::Importer::DEFAULT_OBJECT_TYPES + # @see #create_collections + # @see #create_works + # @see #create_file_sets + # @see #create_relationships + def create_objects(types = []) + types.each do |object_type| + parser.send("create_#{object_type.pluralize}") + end + end + + # @abstract Subclass and override {#create_collections} to implement behavior for the parser. def create_collections - raise StandardError, 'must be defined' if importer? + raise NotImplementedError, 'must be defined' if importer? end + # @abstract Subclass and override {#create_works} to implement behavior for the parser. def create_works - raise StandardError, 'must be defined' if importer? + raise NotImplementedError, 'must be defined' if importer? end + # @abstract Subclass and override {#create_file_sets} to implement behavior for the parser. def create_file_sets - raise StandardError, 'must be defined' if importer? + raise NotImplementedError, 'must be defined' if importer? end + # @abstract Subclass and override {#create_relationships} to implement behavior for the parser. def create_relationships - raise StandardError, 'must be defined' if importer? + raise NotImplementedError, 'must be defined' if importer? end # Optional, define if using browse everything for file upload def retrieve_cloud_files(files); end + # @param file [#path, #original_filename] the file object that with the relevant data for the + # import. def write_import_file(file) path = File.join(path_for_import, file.original_filename) FileUtils.mv( file.path, path ) path end # Base path for imported and exported files + # @param [String] + # @return [String] the base path for files that this parser will "parse" def base_path(type = 'import') # account for multiple versions of hyku is_multitenant = ENV['HYKU_MULTITENANT'] == 'true' || ENV['SETTINGS__MULTITENANCY__ENABLED'] == 'true' is_multitenant ? File.join(Bulkrax.send("#{type}_path"), ::Site.instance.account.name) : Bulkrax.send("#{type}_path") end # Path where we'll store the import metadata and files # this is used for uploaded and cloud files + # @return [String] def path_for_import @path_for_import = File.join(base_path, importerexporter.path_string) FileUtils.mkdir_p(@path_for_import) unless File.exist?(@path_for_import) @path_for_import end + # @abstract Subclass and override {#setup_export_file} to implement behavior for the parser. def setup_export_file - raise StandardError, 'must be defined' if exporter? + raise NotImplementedError, 'must be defined' if exporter? end + # @abstract Subclass and override {#write_files} to implement behavior for the parser. def write_files - raise StandardError, 'must be defined' if exporter? + raise NotImplementedError, 'must be defined' if exporter? end + # @return [TrueClass,FalseClass] def importer? importerexporter.is_a?(Bulkrax::Importer) end + # @return [TrueClass,FalseClass] def exporter? importerexporter.is_a?(Bulkrax::Exporter) end # @param limit [Integer] limit set on the importerexporter # @param index [Integer] index of current iteration - # @return [boolean] + # @return [TrueClass,FalseClass] def limit_reached?(limit, index) return false if limit.nil? || limit.zero? # no limit index >= limit end # Override to add specific validations + # @return [TrueClass,FalseClass] def valid_import? true end + # @return [TrueClass,FalseClass] def record_has_source_identifier(record, index) if record[source_identifier].blank? if Bulkrax.fill_in_blank_source_identifiers.present? record[source_identifier] = Bulkrax.fill_in_blank_source_identifiers.call(self, index) else @@ -197,10 +265,11 @@ ImporterRun.find(current_run.id).increment!(:failed_records) ImporterRun.find(current_run.id).decrement!(:enqueued_records) unless ImporterRun.find(current_run.id).enqueued_records <= 0 # rubocop:disable Style/IdenticalConditionalBranches end # rubocop:enable Rails/SkipsModelValidations + # @return [Array<String>] def required_elements if Bulkrax.fill_in_blank_source_identifiers ['title'] else ['title', source_identifier] @@ -285,15 +354,17 @@ def zip? parser_fields&.[]('import_file_path') && MIME::Types.type_for(parser_fields['import_file_path']).include?('application/zip') end # Path for the import + # @return [String] def import_file_path @import_file_path ||= real_import_file_path end private + # @return [String] def real_import_file_path return importer_unzip_path if file? && zip? parser_fields['import_file_path'] end end