app/models/bulkrax/csv_entry.rb in bulkrax-5.1.0 vs app/models/bulkrax/csv_entry.rb in bulkrax-5.2.0
- old
+ new
@@ -11,19 +11,53 @@
def self.fields_from_data(data)
data.headers.flatten.compact.uniq
end
+ class_attribute(:csv_read_data_options, default: {})
+
# there's a risk that this reads the whole file into memory and could cause a memory leak
def self.read_data(path)
raise StandardError, 'CSV path empty' if path.blank?
- CSV.read(path,
+ options = {
headers: true,
header_converters: ->(h) { h.to_sym },
- encoding: 'utf-8')
+ encoding: 'utf-8'
+ }.merge(csv_read_data_options)
+
+ results = CSV.read(path, **options)
+ csv_wrapper_class.new(results)
end
+ # The purpose of this class is to reject empty lines. This causes lots of grief in importing.
+ # But why not use {CSV.read}'s `skip_lines` option? Because for some CSVs, it will never finish
+ # reading the file.
+ #
+ # There is a spec that demonstrates this approach works.
+ class CsvWrapper
+ include Enumerable
+ def initialize(original)
+ @original = original
+ end
+
+ delegate :headers, to: :@original
+
+ def each
+ @original.each do |row|
+ next if all_fields_are_empty_for(row: row)
+ yield(row)
+ end
+ end
+
+ private
+
+ def all_fields_are_empty_for(row:)
+ row.to_hash.values.all?(&:blank?)
+ end
+ end
+ class_attribute :csv_wrapper_class, default: CsvWrapper
+
def self.data_for_entry(data, _source_id, parser)
# If a multi-line CSV data is passed, grab the first row
data = data.first if data.is_a?(CSV::Table)
# model has to be separated so that it doesn't get mistranslated by to_h
raw_data = data.to_h
@@ -33,15 +67,11 @@
raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents'
return raw_data
end
def build_metadata
- raise StandardError, 'Record not found' if record.nil?
- unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
- raise StandardError,
-"Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}"
- end
+ validate_record
self.parsed_metadata = {}
add_identifier
establish_factory_class
add_ingested_metadata
@@ -54,10 +84,16 @@
add_local
self.parsed_metadata
end
+ def validate_record
+ raise StandardError, 'Record not found' if record.nil?
+ raise StandardError, "Missing required elements, missing element(s) are: "\
+"#{importerexporter.parser.missing_elements(record).join(', ')}" unless importerexporter.parser.required_elements?(record)
+ end
+
def add_identifier
self.parsed_metadata[work_identifier] = [record[source_identifier]]
end
def establish_factory_class
@@ -65,13 +101,14 @@
add_metadata('model', record[key]) if record.key?(key)
end
end
def add_metadata_for_model
- if factory_class == Collection
- add_collection_type_gid
- elsif factory_class == FileSet
+ if defined?(::Collection) && factory_class == ::Collection
+ add_collection_type_gid if defined?(::Hyrax)
+ # add any additional collection metadata methods here
+ elsif factory_class == Bulkrax.file_model_class
validate_presence_of_filename!
add_path_to_file
validate_presence_of_parent!
else
add_file unless importerexporter.metadata_only?
@@ -104,11 +141,11 @@
def build_export_metadata
self.parsed_metadata = {}
build_system_metadata
- build_files_metadata unless hyrax_record.is_a?(Collection)
+ build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection)
build_relationship_metadata
build_mapping_metadata
self.save!
self.parsed_metadata
@@ -155,38 +192,62 @@
handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?)
end
end
+ # The purpose of this helper module is to make easier the testing of the rather complex
+ # switching logic for determining the method we use for building the value.
+ module AttributeBuilderMethod
+ # @param key [Symbol]
+ # @param value [Hash<String, Object>]
+ # @param entry [Bulkrax::Entry]
+ #
+ # @return [NilClass] when we won't be processing this field
+ # @return [Symbol] (either :build_value or :build_object)
+ def self.for(key:, value:, entry:)
+ return if key == 'model'
+ return if key == 'file'
+ return if key == entry.related_parents_parsed_mapping
+ return if key == entry.related_children_parsed_mapping
+ return if value['excluded'] || value[:excluded]
+ return if Bulkrax.reserved_properties.include?(key) && !entry.field_supported?(key)
+
+ object_key = key if value.key?('object') || value.key?(:object)
+ return unless entry.hyrax_record.respond_to?(key.to_s) || object_key.present?
+
+ models_to_skip = Array.wrap(value['skip_object_for_model_names'] || value[:skip_object_for_model_names] || [])
+
+ return :build_value if models_to_skip.detect { |model| entry.factory_class.model_name.name == model }
+ return :build_object if object_key.present?
+
+ :build_value
+ end
+ end
+
def build_mapping_metadata
mapping = fetch_field_mapping
mapping.each do |key, value|
- # these keys are handled by other methods
- next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key)
- next if value['excluded']
- next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
+ method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self)
+ next unless method_name
- object_key = key if value.key?('object')
- next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
-
- if object_key.present?
- build_object(value)
- else
- build_value(key, value)
- end
+ send(method_name, key, value)
end
end
- def build_object(value)
+ def build_object(_key, value)
+ return unless hyrax_record.respond_to?(value['object'])
+
data = hyrax_record.send(value['object'])
return if data.empty?
data = data.to_a if data.is_a?(ActiveTriples::Relation)
object_metadata(Array.wrap(data))
end
def build_value(key, value)
+ return unless hyrax_record.respond_to?(key.to_s)
+
data = hyrax_record.send(key.to_s)
if data.is_a?(ActiveTriples::Relation)
if value['join']
self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s
else
@@ -215,9 +276,17 @@
datum
end
end
def object_metadata(data)
+ # NOTE: What is `d` in this case:
+ #
+ # "[{\"single_object_first_name\"=>\"Fake\", \"single_object_last_name\"=>\"Fakerson\", \"single_object_position\"=>\"Leader, Jester, Queen\", \"single_object_language\"=>\"english\"}]"
+ #
+ # The above is a stringified version of a Ruby string. Using eval is a very bad idea as it
+ # will execute the value of `d` within the full Ruby interpreter context.
+ #
+ # TODO: Would it be possible to store this as a non-string? Maybe the actual Ruby Array and Hash?
data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
data.each_with_index do |obj, index|
next if obj.nil?
# allow the object_key to be valid whether it's a string or symbol