app/models/bulkrax/csv_entry.rb in bulkrax-1.0.2 vs app/models/bulkrax/csv_entry.rb in bulkrax-2.0.0
- old
+ new
@@ -12,63 +12,70 @@
# there's a risk that this reads the whole file into memory and could cause a memory leak
def self.read_data(path)
raise StandardError, 'CSV path empty' if path.blank?
CSV.read(path,
- headers: true,
- header_converters: :symbol,
- encoding: 'utf-8')
+ headers: true,
+ header_converters: :symbol,
+ encoding: 'utf-8')
end
def self.data_for_entry(data, _source_id)
+ ActiveSupport::Deprecation.warn(
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
+ )
# If a multi-line CSV data is passed, grab the first row
data = data.first if data.is_a?(CSV::Table)
# model has to be separated so that it doesn't get mistranslated by to_h
raw_data = data.to_h
- raw_data[:model] = data[:model]
+ raw_data[:model] = data[:model] if data[:model].present?
# If the collection field mapping is not 'collection', add 'collection' - the parser needs it
raw_data[:collection] = raw_data[collection_field.to_sym] if raw_data.keys.include?(collection_field.to_sym) && collection_field != 'collection'
- # If the children field mapping is not 'children', add 'children' - the parser needs it
- raw_data[:children] = raw_data[collection_field.to_sym] if raw_data.keys.include?(children_field.to_sym) && children_field != 'children'
return raw_data
end
- def self.collection_field
- Bulkrax.collection_field_mapping[self.class.to_s] || 'collection'
- end
+ def build_metadata
+ raise StandardError, 'Record not found' if record.nil?
+ raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
- def self.children_field
- Bulkrax.parent_child_field_mapping[self.to_s] || 'children'
+ self.parsed_metadata = {}
+ add_identifier
+ add_metadata_for_model
+ add_visibility
+ add_ingested_metadata
+ add_rights_statement
+ add_collections
+ add_local
+
+ self.parsed_metadata
end
- def keys_without_numbers(keys)
- keys.map { |key| key_without_numbers(key) }
+ def add_identifier
+ self.parsed_metadata[work_identifier] = [record[source_identifier]]
end
- def key_without_numbers(key)
- key.gsub(/_\d+/, '').sub(/^\d+_/, '')
+ def add_metadata_for_model
+ if factory_class == Collection
+ add_collection_type_gid
+ else
+ add_file unless importerexporter.metadata_only?
+ add_admin_set_id
+ end
end
- def build_metadata
- raise StandardError, 'Record not found' if record.nil?
- raise StandardError, "Missing required elements, missing element(s) are: #{importerexporter.parser.missing_elements(keys_without_numbers(record.keys)).join(', ')}" unless importerexporter.parser.required_elements?(keys_without_numbers(record.keys))
+ def add_ingested_metadata
+ ActiveSupport::Deprecation.warn(
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
+ )
+ record.sort.each do |key, value|
+ next if self.parser.collection_field_mapping.to_s == key_without_numbers(key)
- self.parsed_metadata = {}
- self.parsed_metadata[work_identifier] = [record[source_identifier]]
- record.each do |key, value|
- next if key == 'collection'
-
index = key[/\d+/].to_i - 1 if key[/\d+/].to_i != 0
add_metadata(key_without_numbers(key), value, index)
end
- add_file
- add_visibility
- add_rights_statement
- add_admin_set_id
- add_collections
- add_local
- self.parsed_metadata
end
def add_file
self.parsed_metadata['file'] ||= []
if record['file']&.is_a?(String)
@@ -84,71 +91,112 @@
self.parsed_metadata = {}
self.parsed_metadata['id'] = hyrax_record.id
self.parsed_metadata[source_identifier] = hyrax_record.send(work_identifier)
self.parsed_metadata['model'] = hyrax_record.has_model.first
build_mapping_metadata
- self.parsed_metadata['collections'] = hyrax_record.member_of_collection_ids.join('; ')
- unless hyrax_record.is_a?(Collection)
- self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
+
+ # TODO: fix the "send" parameter in the conditional below
+ # currently it returns: "NoMethodError - undefined method 'bulkrax_identifier' for #<Collection:0x00007fbe6a3b4248>"
+ if mapping['collection']&.[]('join')
+ self.parsed_metadata['collection'] = hyrax_record.member_of_collection_ids.join('; ')
+ # self.parsed_metadata['collection'] = hyrax_record.member_of_collections.map { |c| c.send(work_identifier)&.first }.compact.uniq.join(';')
+ else
+ hyrax_record.member_of_collections.each_with_index do |collection, i|
+ self.parsed_metadata["collection_#{i + 1}"] = collection.id
+ # self.parsed_metadata["collection_#{i + 1}"] = collection.send(work_identifier)&.first
+ end
end
+
+ build_files unless hyrax_record.is_a?(Collection)
self.parsed_metadata
end
def build_mapping_metadata
mapping.each do |key, value|
next if Bulkrax.reserved_properties.include?(key) && !field_supported?(key)
next if key == "model"
+ next if value['excluded']
object_key = key if value.key?('object')
next unless hyrax_record.respond_to?(key.to_s) || object_key.present?
- data = object_key.present? ? hyrax_record.send(value['object']) : hyrax_record.send(key.to_s)
if object_key.present?
- next self.parsed_metadata[key] = '' if data.empty?
- data = data.first if data.is_a?(ActiveTriples::Relation)
+ build_object(value)
+ else
+ build_value(key, value)
+ end
+ end
+ end
- object_metadata(data, object_key)
- elsif data.is_a?(ActiveTriples::Relation)
- self.parsed_metadata[key] = data.map { |d| prepare_export_data(d) }.join('; ').to_s unless value[:excluded]
+ def build_object(value)
+ data = hyrax_record.send(value['object'])
+ return if data.empty?
+
+ data = data.to_a if data.is_a?(ActiveTriples::Relation)
+ object_metadata(Array.wrap(data))
+ end
+
+ def build_value(key, value)
+ data = hyrax_record.send(key.to_s)
+ if data.is_a?(ActiveTriples::Relation)
+ if value['join']
+ self.parsed_metadata[key_for_export(key)] = data.map { |d| prepare_export_data(d) }.join('; ').to_s
else
- self.parsed_metadata[key] = prepare_export_data(data)
+ data.each_with_index do |d, i|
+ self.parsed_metadata["#{key_for_export(key)}_#{i + 1}"] = prepare_export_data(d)
+ end
end
+ else
+ self.parsed_metadata[key_for_export(key)] = prepare_export_data(data)
end
end
+ # On export the key becomes the from and the from becomes the destination. It is the opposite of the import because we are moving data the opposite direction
+ # metadata that does not have a specific Bulkrax entry is mapped to the key name, as matching keys coming in are mapped by the csv parser automatically
+ def key_for_export(key)
+ clean_key = key_without_numbers(key)
+ unnumbered_key = mapping[clean_key] ? mapping[clean_key]['from'].first : clean_key
+ # Bring the number back if there is one
+ "#{unnumbered_key}#{key.sub(clean_key, '')}"
+ end
+
def prepare_export_data(datum)
if datum.is_a?(ActiveTriples::Resource)
datum.to_uri.to_s
else
datum
end
end
- def object_metadata(data, object_key)
- data = convert_to_hash(data)
+ def object_metadata(data)
+ data = data.map { |d| eval(d) }.flatten # rubocop:disable Security/Eval
data.each_with_index do |obj, index|
- next unless obj[object_key]
+ next if obj.nil?
+ # allow the object_key to be valid whether it's a string or symbol
+ obj = obj.with_indifferent_access
- next self.parsed_metadata["#{object_key}_#{index + 1}"] = prepare_export_data(obj[object_key]) unless obj[object_key].is_a?(Array)
-
- obj[object_key].each_with_index do |_nested_item, nested_index|
- self.parsed_metadata["#{object_key}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[object_key][nested_index])
+ obj.each_key do |key|
+ if obj[key].is_a?(Array)
+ obj[key].each_with_index do |_nested_item, nested_index|
+ self.parsed_metadata["#{key_for_export(key)}_#{index + 1}_#{nested_index + 1}"] = prepare_export_data(obj[key][nested_index])
+ end
+ else
+ self.parsed_metadata["#{key_for_export(key)}_#{index + 1}"] = prepare_export_data(obj[key])
+ end
end
end
end
- def convert_to_hash(data)
- # converts data from `'[{}]'` to `[{}]`
- gsub_data = data.gsub(/\[{/, '{')
- .gsub(/}\]/, '}')
- .gsub('=>', ':')
- .gsub(/},\s?{/, "}},{{")
- .split("},{")
- gsub_data = [gsub_data] if gsub_data.is_a?(String)
-
- return gsub_data.map { |d| JSON.parse(d) }
+ def build_files
+ if mapping['file']&.[]('join')
+ self.parsed_metadata['file'] = hyrax_record.file_sets.map { |fs| filename(fs).to_s if filename(fs).present? }.compact.join('; ')
+ else
+ hyrax_record.file_sets.each_with_index do |fs, i|
+ self.parsed_metadata["file_#{i + 1}"] = filename(fs).to_s if filename(fs).present?
+ end
+ end
end
# In order for the existing exported hyrax_record, to be updated by a re-import
# we need a unique value in system_identifier
# add the existing hyrax_record id to system_identifier
@@ -165,21 +213,31 @@
def self.matcher_class
Bulkrax::CsvMatcher
end
+ def possible_collection_ids
+ ActiveSupport::Deprecation.warn(
+ 'Creating Collections using the collection_field_mapping will no longer be supported as of Bulkrax version 3.0.' \
+ ' Please configure Bulkrax to use related_parents_field_mapping and related_children_field_mapping instead.'
+ )
+ @possible_collection_ids ||= record.inject([]) do |memo, (key, value)|
+ memo += value.split(/\s*[:;|]\s*/) if self.class.collection_field.to_s == key_without_numbers(key) && value.present?
+ memo
+ end || []
+ end
+
def collections_created?
- return true if record[self.class.collection_field].blank?
- record[self.class.collection_field].split(/\s*[:;|]\s*/).length == self.collection_ids.length
+ possible_collection_ids.length == self.collection_ids.length
end
- def find_or_create_collection_ids
+ def find_collection_ids
return self.collection_ids if collections_created?
- valid_system_id(Collection)
- if record[self.class.collection_field].present?
- record[self.class.collection_field].split(/\s*[:;|]\s*/).each do |collection|
- c = find_collection(collection)
- self.collection_ids << c.id unless c.blank? || self.collection_ids.include?(c.id)
+ if possible_collection_ids.present?
+ possible_collection_ids.each do |collection_id|
+ c = find_collection(collection_id)
+ skip = c.blank? || self.collection_ids.include?(c.id)
+ self.collection_ids << c.id unless skip
end
end
self.collection_ids
end