app/parsers/bulkrax/application_parser.rb in bulkrax-7.0.0 vs app/parsers/bulkrax/application_parser.rb in bulkrax-8.0.0

- old
+ new

@@ -1,8 +1,6 @@ # frozen_string_literal: true -require 'zip' -require 'marcel' module Bulkrax # An abstract class that establishes the API for Bulkrax's import and export parsing. # # @abstract Subclass the Bulkrax::ApplicationParser to create a parser that handles a specific format (e.g. CSV, Bagit, XML, etc). @@ -230,39 +228,53 @@ def rebuild_entry_query(type, statuses) type_col = Bulkrax::Entry.arel_table['type'] status_col = Bulkrax::Entry.arel_table['status_message'] - query = (type == 'work' ? type_col.not.matches(%w[collection file_set]) : type_col.matches(type.camelize)) + query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize)) query.and(status_col.in(statuses)) end def calculate_type_delay(type) return 2.minutes if type == 'file_set' return 1.minute if type == 'work' return 0 end + def record_raw_metadata(record) + record.to_h + end + + def record_deleted?(record) + return false unless record.key?(:delete) + ActiveModel::Type::Boolean.new.cast(record[:delete]) + end + + def record_remove_and_rerun?(record) + return false unless record.key?(:remove_and_rerun) + ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun]) + end + def create_entry_and_job(current_record, type, identifier = nil) identifier ||= current_record[source_identifier] new_entry = find_or_create_entry(send("#{type}_entry_class"), identifier, 'Bulkrax::Importer', - current_record.to_h) + record_raw_metadata(current_record)) new_entry.status_info('Pending', importer.current_run) - if current_record[:delete].present? + if record_deleted?(current_record) "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run) - elsif current_record[:remove_and_rerun].present? || remove_and_rerun + elsif record_remove_and_rerun?(current_record) || remove_and_rerun delay = calculate_type_delay(type) "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run) else "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id) end end # Optional, define if using browse everything for file upload - def retrieve_cloud_files(files); end + def retrieve_cloud_files(_files, _importer); end # @param file [#path, #original_filename] the file object that with the relevant data for the # import. def write_import_file(file) path = File.join(path_for_import, file.original_filename) @@ -380,10 +392,13 @@ importerexporter_id: importerexporter.id, importerexporter_type: type, identifier: identifier ) entry.raw_metadata = raw_metadata + # Setting parsed_metadata specifically for the id so we can find the object via the + # id in a delete. This is likely to get clobbered in a regular import, which is fine. + entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id') entry.save! entry end # @todo - review this method - is it ever used? @@ -411,16 +426,25 @@ write_files zip end def unzip(file_to_unzip) + return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz') + Zip::File.open(file_to_unzip) do |zip_file| zip_file.each do |entry| entry_path = File.join(importer_unzip_path, entry.name) FileUtils.mkdir_p(File.dirname(entry_path)) zip_file.extract(entry, entry_path) unless File.exist?(entry_path) end end + end + + def untar(file_to_untar) + Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path) + command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}" + result = system(command) + raise "Failed to extract #{file_to_untar}" unless result end def zip FileUtils.mkdir_p(exporter_export_zip_path)