app/parsers/bulkrax/application_parser.rb in bulkrax-7.0.0 vs app/parsers/bulkrax/application_parser.rb in bulkrax-8.0.0
- old
+ new
@@ -1,8 +1,6 @@
# frozen_string_literal: true
-require 'zip'
-require 'marcel'
module Bulkrax
# An abstract class that establishes the API for Bulkrax's import and export parsing.
#
# @abstract Subclass the Bulkrax::ApplicationParser to create a parser that handles a specific format (e.g. CSV, Bagit, XML, etc).
@@ -230,39 +228,53 @@
def rebuild_entry_query(type, statuses)
type_col = Bulkrax::Entry.arel_table['type']
status_col = Bulkrax::Entry.arel_table['status_message']
- query = (type == 'work' ? type_col.not.matches(%w[collection file_set]) : type_col.matches(type.camelize))
+ query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize))
query.and(status_col.in(statuses))
end
def calculate_type_delay(type)
return 2.minutes if type == 'file_set'
return 1.minute if type == 'work'
return 0
end
+ def record_raw_metadata(record)
+ record.to_h
+ end
+
+ def record_deleted?(record)
+ return false unless record.key?(:delete)
+ ActiveModel::Type::Boolean.new.cast(record[:delete])
+ end
+
+ def record_remove_and_rerun?(record)
+ return false unless record.key?(:remove_and_rerun)
+ ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
+ end
+
def create_entry_and_job(current_record, type, identifier = nil)
identifier ||= current_record[source_identifier]
new_entry = find_or_create_entry(send("#{type}_entry_class"),
identifier,
'Bulkrax::Importer',
- current_record.to_h)
+ record_raw_metadata(current_record))
new_entry.status_info('Pending', importer.current_run)
- if current_record[:delete].present?
+ if record_deleted?(current_record)
"Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
- elsif current_record[:remove_and_rerun].present? || remove_and_rerun
+ elsif record_remove_and_rerun?(current_record) || remove_and_rerun
delay = calculate_type_delay(type)
"Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
else
"Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
end
end
# Optional, define if using browse everything for file upload
- def retrieve_cloud_files(files); end
+ def retrieve_cloud_files(_files, _importer); end
# @param file [#path, #original_filename] the file object that with the relevant data for the
# import.
def write_import_file(file)
path = File.join(path_for_import, file.original_filename)
@@ -380,10 +392,13 @@
importerexporter_id: importerexporter.id,
importerexporter_type: type,
identifier: identifier
)
entry.raw_metadata = raw_metadata
+ # Setting parsed_metadata specifically for the id so we can find the object via the
+ # id in a delete. This is likely to get clobbered in a regular import, which is fine.
+ entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id')
entry.save!
entry
end
# @todo - review this method - is it ever used?
@@ -411,16 +426,25 @@
write_files
zip
end
def unzip(file_to_unzip)
+ return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
+
Zip::File.open(file_to_unzip) do |zip_file|
zip_file.each do |entry|
entry_path = File.join(importer_unzip_path, entry.name)
FileUtils.mkdir_p(File.dirname(entry_path))
zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
end
end
+ end
+
+ def untar(file_to_untar)
+ Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
+ command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
+ result = system(command)
+ raise "Failed to extract #{file_to_untar}" unless result
end
def zip
FileUtils.mkdir_p(exporter_export_zip_path)