app/models/bulkrax/csv_entry.rb in bulkrax-5.1.0 vs app/models/bulkrax/csv_entry.rb in bulkrax-5.2.0

- old
+ new

@@ -11,19 +11,53 @@ def self.fields_from_data(data) data.headers.flatten.compact.uniq end + class_attribute(:csv_read_data_options, default: {}) + # there's a risk that this reads the whole file into memory and could cause a memory leak def self.read_data(path) raise StandardError, 'CSV path empty' if path.blank? - CSV.read(path, + options = { headers: true, header_converters: ->(h) { h.to_sym }, - encoding: 'utf-8') + encoding: 'utf-8' + }.merge(csv_read_data_options) + + results = CSV.read(path, **options) + csv_wrapper_class.new(results) end + # The purpose of this class is to reject empty lines. This causes lots of grief in importing. + # But why not use {CSV.read}'s `skip_lines` option? Because for some CSVs, it will never finish + # reading the file. + # + # There is a spec that demonstrates this approach works. + class CsvWrapper + include Enumerable + def initialize(original) + @original = original + end + + delegate :headers, to: :@original + + def each + @original.each do |row| + next if all_fields_are_empty_for(row: row) + yield(row) + end + end + + private + + def all_fields_are_empty_for(row:) + row.to_hash.values.all?(&:blank?) + end + end + class_attribute :csv_wrapper_class, default: CsvWrapper + def self.data_for_entry(data, _source_id, parser) # If a multi-line CSV data is passed, grab the first row data = data.first if data.is_a?(CSV::Table) # model has to be separated so that it doesn't get mistranslated by to_h raw_data = data.to_h @@ -33,15 +67,11 @@ raw_data[:parents] = raw_data[parent_field(parser).to_sym] if raw_data.keys.include?(parent_field(parser).to_sym) && parent_field(parser) != 'parents' return raw_data end def build_metadata - raise StandardError, 'Record not found' if record.nil? - unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys)) - raise StandardError, -"Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" - end + validate_record self.parsed_metadata = {} add_identifier establish_factory_class add_ingested_metadata @@ -54,10 +84,16 @@ add_local self.parsed_metadata end + def validate_record + raise StandardError, 'Record not found' if record.nil? + raise StandardError, "Missing required elements, missing element(s) are: "\ +"#{importerexporter.parser.missing_elements(record).join(', ')}" unless importerexporter.parser.required_elements?(record) + end + def add_identifier self.parsed_metadata[work_identifier] = [record[source_identifier]] end def establish_factory_class @@ -65,13 +101,14 @@ add_metadata('model', record[key]) if record.key?(key) end end def add_metadata_for_model - if factory_class == Collection - add_collection_type_gid - elsif factory_class == FileSet + if defined?(::Collection) && factory_class == ::Collection + add_collection_type_gid if defined?(::Hyrax) + # add any additional collection metadata methods here + elsif factory_class == Bulkrax.file_model_class validate_presence_of_filename! add_path_to_file validate_presence_of_parent! else add_file unless importerexporter.metadata_only? @@ -104,11 +141,11 @@ def build_export_metadata self.parsed_metadata = {} build_system_metadata - build_files_metadata unless hyrax_record.is_a?(Collection) + build_files_metadata if defined?(Collection) && !hyrax_record.is_a?(Collection) build_relationship_metadata build_mapping_metadata self.save! self.parsed_metadata @@ -155,38 +192,62 @@ handle_join_on_export(relationship_key, values, mapping[related_parents_parsed_mapping]['join'].present?) end end + # The purpose of this helper module is to make easier the testing of the rather complex + # switching logic for determining the method we use for building the value. + module AttributeBuilderMethod + # @param key [Symbol] + # @param value [Hash<String, Object>] + # @param entry [Bulkrax::Entry] + # + # @return [NilClass] when we won't be processing this field + # @return [Symbol] (either :build_value or :build_object) + def self.for(key:, value:, entry:) + return if key == 'model' + return if key == 'file' + return if key == entry.related_parents_parsed_mapping + return if key == entry.related_children_parsed_mapping + return if value['excluded'] || value[:excluded] + return if Bulkrax.reserved_properties.include?(key) && !entry.field_supported?(key) + + object_key = key if value.key?('object') || value.key?(:object) + return unless entry.hyrax_record.respond_to?(key.to_s) || object_key.present? + + models_to_skip = Array.wrap(value['skip_object_for_model_names'] || value[:skip_object_for_model_names] || []) + + return :build_value if models_to_skip.detect { |model| entry.factory_class.model_name.name == model } + return :build_object if object_key.present? + + :build_value + end + end + def build_mapping_metadata mapping = fetch_field_mapping mapping.each do |key, value| - # these keys are handled by other methods - next if ['model', 'file', related_parents_parsed_mapping, related_children_parsed_mapping].include?(key) - next if value['excluded'] - next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key) + method_name = AttributeBuilderMethod.for(key: key, value: value, entry: self) + next unless method_name - object_key = key if value.key?('object') - next unless hyrax_record.respond_to?(key.to_s) || object_key.present? - - if object_key.present? - build_object(value) - else - build_value(key, value) - end + send(method_name, key, value) end end - def build_object(value) + def build_object(_key, value) + return unless hyrax_record.respond_to?(value['object']) + data = hyrax_record.send(value['object']) return if data.empty? data = data.to_a if data.is_a?(ActiveTriples::Relation) object_metadata(Array.wrap(data)) end def build_value(key, value) + return unless hyrax_record.respond_to?(key.to_s) + data = hyrax_record.send(key.to_s) if data.is_a?(ActiveTriples::Relation) if value['join'] self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join(Bulkrax.multi_value_element_join_on).to_s else @@ -215,9 +276,17 @@ datum end end def object_metadata(data) + # NOTE: What is `d` in this case: + # + # "[{\"single_object_first_name\"=>\"Fake\", \"single_object_last_name\"=>\"Fakerson\", \"single_object_position\"=>\"Leader, Jester, Queen\", \"single_object_language\"=>\"english\"}]" + # + # The above is a stringified version of a Ruby string. Using eval is a very bad idea as it + # will execute the value of `d` within the full Ruby interpreter context. + # + # TODO: Would it be possible to store this as a non-string? Maybe the actual Ruby Array and Hash? data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval data.each_with_index do |obj, index| next if obj.nil? # allow the object_key to be valid whether it's a string or symbol