app/parsers/bulkrax/application_parser.rb in bulkrax-3.2.0 vs app/parsers/bulkrax/application_parser.rb in bulkrax-3.3.0
- old
+ new
@@ -49,10 +49,14 @@
def work_identifier
@work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source
end
+ def generated_metadata_mapping
+ @generated_metadata_mapping ||= 'generated'
+ end
+
def related_parents_raw_mapping
@related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first
end
def related_parents_parsed_mapping
@@ -240,21 +244,54 @@
end
def write
write_files
zip
+ # uncomment next line to debug for faulty zipping during bagit export
+ bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit')
end
def unzip(file_to_unzip)
- WillowSword::ZipPackage.new(file_to_unzip, importer_unzip_path).unzip_file
+ Zip::File.open(file_to_unzip) do |zip_file|
+ zip_file.each do |entry|
+ entry_path = File.join(importer_unzip_path, entry.name)
+ FileUtils.mkdir_p(File.dirname(entry_path))
+ zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
+ end
+ end
end
def zip
FileUtils.rm_rf(exporter_export_zip_path)
- WillowSword::ZipPackage.new(exporter_export_path, exporter_export_zip_path).create_zip
+ Zip::File.open(exporter_export_zip_path, create: true) do |zip_file|
+ Dir["#{exporter_export_path}/**/**"].each do |file|
+ zip_file.add(file.sub("#{exporter_export_path}/", ''), file)
+ end
+ end
end
+ # TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed
+ def bagit_zip_file_size_check
+ Zip::File.open(exporter_export_zip_path) do |zip_file|
+ zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file|
+ Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file|
+ begin
+ raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size
+ rescue BagitZipError => e
+ matched_entry_ids = importerexporter.entry_ids.select do |id|
+ Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first)
+ end
+ matched_entry_ids.each do |entry_id|
+ Bulkrax::Entry.find(entry_id).status_info(e)
+ status_info('Complete (with failures)')
+ end
+ end
+ end
+ end
+ end
+ end
+
# Is this a file?
def file?
parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path'])
end
@@ -270,10 +307,9 @@
private
def real_import_file_path
return importer_unzip_path if file? && zip?
-
parser_fields['import_file_path']
end
end
end