app/parsers/bulkrax/application_parser.rb in bulkrax-3.2.0 vs app/parsers/bulkrax/application_parser.rb in bulkrax-3.3.0

- old
+ new

@@ -49,10 +49,14 @@ def work_identifier @work_identifier ||= get_field_mapping_hash_for('source_identifier')&.keys&.first&.to_sym || :source end + def generated_metadata_mapping + @generated_metadata_mapping ||= 'generated' + end + def related_parents_raw_mapping @related_parents_raw_mapping ||= get_field_mapping_hash_for('related_parents_field_mapping')&.values&.first&.[]('from')&.first end def related_parents_parsed_mapping @@ -240,21 +244,54 @@ end def write write_files zip + # uncomment next line to debug for faulty zipping during bagit export + bagit_zip_file_size_check if importerexporter.parser_klass.include?('Bagit') end def unzip(file_to_unzip) - WillowSword::ZipPackage.new(file_to_unzip, importer_unzip_path).unzip_file + Zip::File.open(file_to_unzip) do |zip_file| + zip_file.each do |entry| + entry_path = File.join(importer_unzip_path, entry.name) + FileUtils.mkdir_p(File.dirname(entry_path)) + zip_file.extract(entry, entry_path) unless File.exist?(entry_path) + end + end end def zip FileUtils.rm_rf(exporter_export_zip_path) - WillowSword::ZipPackage.new(exporter_export_path, exporter_export_zip_path).create_zip + Zip::File.open(exporter_export_zip_path, create: true) do |zip_file| + Dir["#{exporter_export_path}/**/**"].each do |file| + zip_file.add(file.sub("#{exporter_export_path}/", ''), file) + end + end end + # TODO: remove Entry::BagitZipError as well as this method when we're sure it's not needed + def bagit_zip_file_size_check + Zip::File.open(exporter_export_zip_path) do |zip_file| + zip_file.select { |entry| entry.name.include?('data/') && entry.file? }.each do |zipped_file| + Dir["#{exporter_export_path}/**/data/*"].select { |file| file.include?(zipped_file.name) }.each do |file| + begin + raise BagitZipError, "Invalid Bag, file size mismatch for #{file.sub("#{exporter_export_path}/", '')}" if File.size(file) != zipped_file.size + rescue BagitZipError => e + matched_entry_ids = importerexporter.entry_ids.select do |id| + Bulkrax::Entry.find(id).identifier.include?(zipped_file.name.split('/').first) + end + matched_entry_ids.each do |entry_id| + Bulkrax::Entry.find(entry_id).status_info(e) + status_info('Complete (with failures)') + end + end + end + end + end + end + # Is this a file? def file? parser_fields&.[]('import_file_path') && File.file?(parser_fields['import_file_path']) end @@ -270,10 +307,9 @@ private def real_import_file_path return importer_unzip_path if file? && zip? - parser_fields['import_file_path'] end end end