application_parser.rb in bulkrax-8.0.0

- old
+ new

@@ -1,8 +1,6 @@
 # frozen_string_literal: true
-require 'zip'
-require 'marcel'
 
 module Bulkrax
   # An abstract class that establishes the API for Bulkrax's import and export parsing.
   #
   # @abstract Subclass the Bulkrax::ApplicationParser to create a parser that handles a specific format (e.g. CSV, Bagit, XML, etc).
@@ -230,39 +228,53 @@
 
     def rebuild_entry_query(type, statuses)
       type_col = Bulkrax::Entry.arel_table['type']
       status_col = Bulkrax::Entry.arel_table['status_message']
 
-      query = (type == 'work' ? type_col.not.matches(%w[collection file_set]) : type_col.matches(type.camelize))
+      query = (type == 'work' ? type_col.does_not_match_all(%w[collection file_set]) : type_col.matches(type.camelize))
       query.and(status_col.in(statuses))
     end
 
     def calculate_type_delay(type)
       return 2.minutes if type == 'file_set'
       return 1.minute if type == 'work'
       return 0
     end
 
+    def record_raw_metadata(record)
+      record.to_h
+    end
+
+    def record_deleted?(record)
+      return false unless record.key?(:delete)
+      ActiveModel::Type::Boolean.new.cast(record[:delete])
+    end
+
+    def record_remove_and_rerun?(record)
+      return false unless record.key?(:remove_and_rerun)
+      ActiveModel::Type::Boolean.new.cast(record[:remove_and_rerun])
+    end
+
     def create_entry_and_job(current_record, type, identifier = nil)
       identifier ||= current_record[source_identifier]
       new_entry = find_or_create_entry(send("#{type}_entry_class"),
                                        identifier,
                                        'Bulkrax::Importer',
-                                       current_record.to_h)
+                                       record_raw_metadata(current_record))
       new_entry.status_info('Pending', importer.current_run)
-      if current_record[:delete].present?
+      if record_deleted?(current_record)
         "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
-      elsif current_record[:remove_and_rerun].present? || remove_and_rerun
+      elsif record_remove_and_rerun?(current_record) || remove_and_rerun
         delay = calculate_type_delay(type)
         "Bulkrax::DeleteAndImport#{type.camelize}Job".constantize.set(wait: delay).send(perform_method, new_entry, current_run)
       else
         "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
       end
     end
 
     # Optional, define if using browse everything for file upload
-    def retrieve_cloud_files(files); end
+    def retrieve_cloud_files(_files, _importer); end
 
     # @param file [#path, #original_filename] the file object that with the relevant data for the
     #        import.
     def write_import_file(file)
       path = File.join(path_for_import, file.original_filename)
@@ -380,10 +392,13 @@
         importerexporter_id: importerexporter.id,
         importerexporter_type: type,
         identifier: identifier
       )
       entry.raw_metadata = raw_metadata
+      # Setting parsed_metadata specifically for the id so we can find the object via the
+      # id in a delete.  This is likely to get clobbered in a regular import, which is fine.
+      entry.parsed_metadata = { id: raw_metadata['id'] } if raw_metadata&.key?('id')
       entry.save!
       entry
     end
 
     # @todo - review this method - is it ever used?
@@ -411,16 +426,25 @@
       write_files
       zip
     end
 
     def unzip(file_to_unzip)
+      return untar(file_to_unzip) if file_to_unzip.end_with?('.tar.gz')
+
       Zip::File.open(file_to_unzip) do |zip_file|
         zip_file.each do |entry|
           entry_path = File.join(importer_unzip_path, entry.name)
           FileUtils.mkdir_p(File.dirname(entry_path))
           zip_file.extract(entry, entry_path) unless File.exist?(entry_path)
         end
       end
+    end
+
+    def untar(file_to_untar)
+      Dir.mkdir(importer_unzip_path) unless File.directory?(importer_unzip_path)
+      command = "tar -xzf #{Shellwords.escape(file_to_untar)} -C #{Shellwords.escape(importer_unzip_path)}"
+      result = system(command)
+      raise "Failed to extract #{file_to_untar}" unless result
     end
 
     def zip
       FileUtils.mkdir_p(exporter_export_zip_path)