# frozen_string_literal: true require "bulkrax/version" require "bulkrax/engine" require 'active_support/all' require 'coderay' require 'csv' require 'denormalize_fields' require 'erb' require 'iso8601' require 'language_list' require 'marcel' require 'nokogiri' require 'ostruct' require 'zip' def conditional_require(gem_name) require gem_name rescue LoadError ENV["BULKRAX_NO_#{gem_name.upcase}"] = 'true' end conditional_require 'bagit' conditional_require 'rdf' # rubocop:disable Metrics/ModuleLength module Bulkrax extend self # rubocop:disable Style/ModuleFunction extend Forwardable ## # @api public class Configuration attr_accessor :api_definition, :default_field_mapping, :default_work_type, :export_path, :field_mappings, :generated_metadata_mapping, :import_path, :multi_value_element_join_on, :multi_value_element_split_on, :object_factory, :parsers, :qa_controlled_properties, :related_children_field_mapping, :related_parents_field_mapping, :relationship_job_class, :removed_image_path, :required_elements, :reserved_properties, :server_name ## # @return [#call] with arity 2. The first parameter is a {Bulkrax::ApplicationParser} and the # second parameter is an Integer for the index of the record encountered in the import. attr_accessor :fill_in_blank_source_identifiers ## # @param [String] attr_writer :solr_key_for_member_file_ids ## # @return [String] # @see https://github.com/samvera/hyrax/pull/6513 def solr_key_for_member_file_ids return @solr_key_for_member_file_ids if @solr_key_for_member_file_ids.present? return "member_ids_ssim" if defined?(Hyrax) "#{file_model_class.name.to_s.underscore}_ids_ssim" end ## # @param coercer [#call] # @see Bulkrax::FactoryClassFinder attr_writer :factory_class_name_coercer ## # A function responsible for converting the name of a factory class to the corresponding # constant. # # @return [#call, Bulkrax::FactoryClassFinder::DefaultCoercer] an object responding to call, # with one positional parameter (e.g. arity == 1) # # @example # Bulkrax.factory_class_name_coercer.call("Work") # => Work def factory_class_name_coercer @factory_class_name_coercer || Bulkrax::FactoryClassFinder::DefaultCoercer end def collection_model_class @collection_model_class ||= Collection if defined?(::Hyrax) end attr_writer :collection_model_class def collection_model_internal_resource # WARN: Using #try on :internal_resource can yield unexpected results. # If the method is undefined, it can return a truthy value instead of # the typical nil. # # E.g. # ```ruby # Hyrax::FileSet.try(:internal_resource) || 'hi' # => # # # { "contributor" => { from: ["contributor"] }, # no appropriate mapping for coverage (based_near needs id) # ""=>{:from=>["coverage"]}, "creator" => { from: ["creator"] }, "date_created" => { from: ["date"] }, "description" => { from: ["description"] }, # no appropriate mapping for format # ""=>{:from=>["format"]}, "identifier" => { from: ["identifier"] }, "language" => { from: ["language"], parsed: true }, "publisher" => { from: ["publisher"] }, "related_url" => { from: ["relation"] }, "rights_statement" => { from: ["rights"] }, "source" => { from: ["source"] }, "subject" => { from: ["subject"], parsed: true }, "title" => { from: ["title"] }, "resource_type" => { from: ["type"], parsed: true }, "remote_files" => { from: ["thumbnail_url"], parsed: true } }, "Bulkrax::OaiQualifiedDcParser" => { "abstract" => { from: ["abstract"] }, "alternative_title" => { from: ["alternative"] }, "bibliographic_citation" => { from: ["bibliographicCitation"] }, "contributor" => { from: ["contributor"] }, "creator" => { from: ["creator"] }, "date_created" => { from: ["created"] }, "description" => { from: ["description"] }, "language" => { from: ["language"] }, "license" => { from: ["license"] }, "publisher" => { from: ["publisher"] }, "related_url" => { from: ["relation"] }, "rights_holder" => { from: ["rightsHolder"] }, "rights_statement" => { from: ["rights"] }, "source" => { from: ["source"] }, "subject" => { from: ["subject"], parsed: true }, "title" => { from: ["title"] }, "resource_type" => { from: ["type"], parsed: true }, "remote_files" => { from: ["thumbnail_url"], parsed: true } }, # When empty, a default_field_mapping will be generated "Bulkrax::CsvParser" => {}, 'Bulkrax::BagitParser' => {}, 'Bulkrax::XmlParser' => {} } # Lambda to set the default field mapping conf.default_field_mapping = lambda do |field| return if field.blank? { field.to_s => { from: [field.to_s], split: false, parsed: Bulkrax::ApplicationMatcher.method_defined?("parse_#{field}"), if: nil, excluded: false } } end # Properties that should not be used in imports. They are reserved for use by Hyrax. conf.reserved_properties = %w[ create_date modified_date date_modified date_uploaded depositor arkivo_checksum has_model head label import_url on_behalf_of proxy_depositor owner state tail original_url relative_path ] # List of Questioning Authority properties that are controlled via YAML files in # the config/authorities/ directory. For example, the :rights_statement property # is controlled by the active terms in config/authorities/rights_statements.yml conf.qa_controlled_properties = %w[rights_statement license] end def api_definition @api_definition ||= ActiveSupport::HashWithIndifferentAccess.new( YAML.safe_load( ERB.new( File.read(Rails.root.join('config', 'bulkrax_api.yml')) ).result ) ) end DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON = ' | ' # Specify the delimiter for joining an attribute's multi-value array into a string. # # @note the specific delimiter should likely be present in the multi_value_element_split_on # expression. def multi_value_element_join_on @multi_value_element_join_on ||= DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON end DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON = /\s*[:;|]\s*/.freeze # @return [RegexClass] the regular express to use to "split" an attribute's values. If set to # `true` use the DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON. # # @note The "true" value is to preserve backwards compatibility. # @see DEFAULT_MULTI_VALUE_ELEMENT_JOIN_ON def multi_value_element_split_on if @multi_value_element_join_on.is_a?(TrueClass) DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON else @multi_value_element_split_on ||= DEFAULT_MULTI_VALUE_ELEMENT_SPLIT_ON end end # Responsible for stripping hidden characters from the given string. # # @param value [#to_s] # @return [String] with hidden characters removed # # @see https://github.com/samvera-labs/bulkrax/issues/688 def normalize_string(value) # Removing [Byte Order Mark (BOM)](https://en.wikipedia.org/wiki/Byte_order_mark) value.to_s.delete("\xEF\xBB\xBF") end def fallback_user_for_importer_exporter_processing return User.batch_user if defined?(Hyrax) && User.respond_to?(:batch_user) raise "We have no fallback user available for Bulkrax.fallback_user_for_importer_exporter_processing" end # This class confirms to the Active::Support.serialize interface. It's job is to ensure that we # don't have keys with the tricksy Byte Order Mark character. # # @see https://api.rubyonrails.org/classes/ActiveRecord/AttributeMethods/Serialization/ClassMethods.html#method-i-serialize class NormalizedJson def self.normalize_keys(hash) return hash unless hash.respond_to?(:each_pair) returning_value = {} hash.each_pair do |key, value| returning_value[Bulkrax.normalize_string(key)] = value end returning_value end # When we write the serialized data to the database, we "dump" the value into that database # column. def self.dump(value) JSON.dump(normalize_keys(value)) end # When we load the serialized data from the database, we pass the database's value into "load" # function. # # rubocop:disable Security/JSONLoad def self.load(string) normalize_keys(JSON.load(string)) end # rubocop:enable Security/JSONLoad end end # rubocop:disable Metrics/ModuleLength