# frozen_string_literal: true

require 'csv'
module Bulkrax
  class CsvParser < ApplicationParser # rubocop:disable Metrics/ClassLength
    include ErroredEntries
    include ExportBehavior
    attr_writer :collections, :file_sets, :works

    def self.export_supported?
      true
    end

    def records(_opts = {})
      return @records if @records.present?

      file_for_import = only_updates ? parser_fields['partial_import_file_path'] : import_file_path
      # data for entry does not need source_identifier for csv, because csvs are read sequentially and mapped after raw data is read.
      csv_data = entry_class.read_data(file_for_import)
      importer.parser_fields['total'] = csv_data.count
      importer.save

      @records = csv_data.map { |record_data| entry_class.data_for_entry(record_data, nil, self) }
    end

    def build_records
      @collections = []
      @works = []
      @file_sets = []

      if model_field_mappings.map { |mfm| mfm.to_sym.in?(records.first.keys) }.any?
        records.map do |r|
          model_field_mappings.map(&:to_sym).each do |model_mapping|
            next unless r.key?(model_mapping)

            if r[model_mapping].strip.casecmp('collection').zero?
              @collections << r
            elsif r[model_mapping].strip.casecmp('fileset').zero?
              @file_sets << r
            else
              @works << r
            end
          end
        end
        @collections = @collections.flatten.compact.uniq
        @file_sets = @file_sets.flatten.compact.uniq
        @works = @works.flatten.compact.uniq
      else # if no model is specified, assume all records are works
        @works = records.flatten.compact.uniq
      end

      true
    end

    def collections
      build_records if @collections.nil?
      @collections
    end

    def works
      build_records if @works.nil?
      @works
    end

    def file_sets
      build_records if @file_sets.nil?
      @file_sets
    end

    def collections_total
      collections.size
    end

    def works_total
      works.size
    end

    def file_sets_total
      file_sets.size
    end

    # We could use CsvEntry#fields_from_data(data) but that would mean re-reading the data
    def import_fields
      @import_fields ||= records.inject(:merge).keys.compact.uniq
    end

    def required_elements?(record)
      missing_elements(record).blank?
    end

    def missing_elements(record)
      keys_from_record = keys_without_numbers(record.reject { |_, v| v.blank? }.keys.compact.uniq.map(&:to_s))
      keys = []
      # Because we're persisting the mapping in the database, these are likely string keys.
      # However, there's no guarantee.  So, we need to ensure that by running stringify.
      importerexporter.mapping.stringify_keys.map do |k, v|
        Array.wrap(v['from']).each do |vf|
          keys << k if keys_from_record.include?(vf)
        end
      end
      required_elements.map(&:to_s) - keys.uniq.map(&:to_s)
    end

    def valid_import?
      compressed_record = records.flat_map(&:to_a).partition { |_, v| !v }.flatten(1).to_h
      error_alert = "Missing at least one required element, missing element(s) are: #{missing_elements(compressed_record).join(', ')}"
      raise StandardError, error_alert unless required_elements?(compressed_record)

      file_paths.is_a?(Array)
    rescue StandardError => e
      set_status_info(e)
      false
    end

    def create_collections
      create_objects(['collection'])
    end

    def create_works
      create_objects(['work'])
    end

    def create_file_sets
      create_objects(['file_set'])
    end

    def create_relationships
      create_objects(['relationship'])
    end

    def create_objects(types_array = nil)
      index = 0
      (types_array || %w[collection work file_set relationship]).each do |type|
        if type.eql?('relationship')
          ScheduleRelationshipsJob.set(wait: 5.minutes).perform_later(importer_id: importerexporter.id)
          next
        end
        send(type.pluralize).each do |current_record|
          next unless record_has_source_identifier(current_record, index)
          break if limit_reached?(limit, index)

          seen[current_record[source_identifier]] = true
          create_entry_and_job(current_record, type)
          increment_counters(index, "#{type}": true)
          index += 1
        end
        importer.record_status
      end
      true
    rescue StandardError => e
      set_status_info(e)
    end

    def create_entry_and_job(current_record, type)
      new_entry = find_or_create_entry(send("#{type}_entry_class"),
                                       current_record[source_identifier],
                                       'Bulkrax::Importer',
                                       current_record.to_h)
      if current_record[:delete].present?
        "Bulkrax::Delete#{type.camelize}Job".constantize.send(perform_method, new_entry, current_run)
      else
        "Bulkrax::Import#{type.camelize}Job".constantize.send(perform_method, new_entry.id, current_run.id)
      end
    end

    def write_partial_import_file(file)
      import_filename = import_file_path.split('/').last
      partial_import_filename = "#{File.basename(import_filename, '.csv')}_corrected_entries.csv"

      path = File.join(path_for_import, partial_import_filename)
      FileUtils.mv(
        file.path,
        path
      )
      path
    end

    def current_records_for_export
      @current_records_for_export ||= Bulkrax::ParserExportRecordSet.for(
        parser: self,
        export_from: importerexporter.export_from
      )
    end

    def create_new_entries
      # NOTE: The each method enforces the limit, as it can best optimize the underlying queries.
      current_records_for_export.each do |id, entry_class|
        new_entry = find_or_create_entry(entry_class, id, 'Bulkrax::Exporter')
        begin
          entry = ExportWorkJob.perform_now(new_entry.id, current_run.id)
        rescue => e
          Rails.logger.info("#{e.message} was detected during export")
        end

        self.headers |= entry.parsed_metadata.keys if entry
      end
    end
    alias create_from_collection create_new_entries
    alias create_from_importer create_new_entries
    alias create_from_worktype create_new_entries
    alias create_from_all create_new_entries

    def entry_class
      CsvEntry
    end
    alias work_entry_class entry_class

    def collection_entry_class
      CsvCollectionEntry
    end

    def file_set_entry_class
      CsvFileSetEntry
    end

    def valid_entry_types
      [collection_entry_class.to_s, file_set_entry_class.to_s, entry_class.to_s]
    end

    # TODO: figure out why using the version of this method that's in the bagit parser
    # breaks specs for the "if importer?" line
    def total
      @total =
        if importer?
          importer.parser_fields['total'] || 0
        elsif exporter?
          limit.to_i.zero? ? current_records_for_export.count : limit.to_i
        else
          0
        end

      return @total
    rescue StandardError
      @total = 0
    end

    def records_split_count
      1000
    end

    # @todo - investigate getting directory structure
    # @todo - investigate using perform_later, and having the importer check for
    #   DownloadCloudFileJob before it starts
    def retrieve_cloud_files(files)
      files_path = File.join(path_for_import, 'files')
      FileUtils.mkdir_p(files_path) unless File.exist?(files_path)
      files.each_pair do |_key, file|
        # fixes bug where auth headers do not get attached properly
        if file['auth_header'].present?
          file['headers'] ||= {}
          file['headers'].merge!(file['auth_header'])
        end
        # this only works for uniquely named files
        target_file = File.join(files_path, file['file_name'].tr(' ', '_'))
        # Now because we want the files in place before the importer runs
        # Problematic for a large upload
        Bulkrax::DownloadCloudFileJob.perform_now(file, target_file)
      end
      return nil
    end

    # export methods

    def write_files
      require 'open-uri'
      folder_count = 0
      # TODO: This is not performant as well; unclear how to address, but lower priority as of
      #       <2023-02-21 Tue>.
      sorted_entries = sort_entries(importerexporter.entries.uniq(&:identifier))
                       .select { |e| valid_entry_types.include?(e.type) }

      group_size = limit.to_i.zero? ? total : limit.to_i
      sorted_entries[0..group_size].in_groups_of(records_split_count, false) do |group|
        folder_count += 1

        CSV.open(setup_export_file(folder_count), "w", headers: export_headers, write_headers: true) do |csv|
          group.each do |entry|
            csv << entry.parsed_metadata
            next if importerexporter.metadata_only? || entry.type == 'Bulkrax::CsvCollectionEntry'

            store_files(entry.identifier, folder_count.to_s)
          end
        end
      end
    end

    def store_files(identifier, folder_count)
      record = ActiveFedora::Base.find(identifier)
      return unless record

      file_sets = record.file_set? ? Array.wrap(record) : record.file_sets
      file_sets << record.thumbnail if exporter.include_thumbnails && record.thumbnail.present? && record.work?
      file_sets.each do |fs|
        path = File.join(exporter_export_path, folder_count, 'files')
        FileUtils.mkdir_p(path) unless File.exist? path
        file = filename(fs)
        next if file.blank? || fs.original_file.blank?

        io = open(fs.original_file.uri)
        File.open(File.join(path, file), 'wb') do |f|
          f.write(io.read)
          f.close
        end
      end
    rescue Ldp::Gone
      return
    end

    def export_key_allowed(key)
      new_entry(entry_class, 'Bulkrax::Exporter').field_supported?(key) &&
        key != source_identifier.to_s
    end

    # All possible column names
    def export_headers
      headers = sort_headers(self.headers)

      # we don't want access_control_id exported and we want file at the end
      headers.delete('access_control_id') if headers.include?('access_control_id')

      # add the headers below at the beginning or end to maintain the preexisting export behavior
      headers.prepend('model')
      headers.prepend(source_identifier.to_s)
      headers.prepend('id')

      headers.uniq
    end

    def object_names
      return @object_names if @object_names

      @object_names = mapping.values.map { |value| value['object'] }
      @object_names.uniq!&.delete(nil)

      @object_names
    end

    def sort_entries(entries)
      # always export models in the same order: work, collection, file set
      entries.sort_by do |entry|
        case entry.type
        when 'Bulkrax::CsvCollectionEntry'
          '1'
        when 'Bulkrax::CsvFileSetEntry'
          '2'
        else
          '0'
        end
      end
    end

    def sort_headers(headers)
      # converting headers like creator_name_1 to creator_1_name so they get sorted by numerical order
      # while keeping objects grouped together
      headers.sort_by do |item|
        number = item.match(/\d+/)&.[](0) || 0.to_s
        sort_number = number.rjust(4, "0")
        object_prefix = object_names.detect { |o| item.match(/^#{o}/) } || item
        remainder = item.gsub(/^#{object_prefix}_/, '').gsub(/_#{number}/, '')
        "#{object_prefix}_#{sort_number}_#{remainder}"
      end
    end

    # in the parser as it is specific to the format
    def setup_export_file(folder_count)
      path = File.join(importerexporter.exporter_export_path, folder_count.to_s)
      FileUtils.mkdir_p(path) unless File.exist?(path)

      File.join(path, "export_#{importerexporter.export_source}_from_#{importerexporter.export_from}_#{folder_count}.csv")
    end

    # Retrieve file paths for [:file] mapping in records
    #  and check all listed files exist.
    def file_paths
      raise StandardError, 'No records were found' if records.blank?
      return [] if importerexporter.metadata_only?

      @file_paths ||= records.map do |r|
        file_mapping = Bulkrax.field_mappings.dig(self.class.to_s, 'file', :from)&.first&.to_sym || :file
        next if r[file_mapping].blank?

        r[file_mapping].split(Bulkrax.multi_value_element_split_on).map do |f|
          file = File.join(path_to_files, f.tr(' ', '_'))
          if File.exist?(file) # rubocop:disable Style/GuardClause
            file
          else
            raise "File #{file} does not exist"
          end
        end
      end.flatten.compact.uniq
    end

    # Retrieve the path where we expect to find the files
    def path_to_files(**args)
      filename = args.fetch(:filename, '')

      return @path_to_files if @path_to_files.present? && filename.blank?
      @path_to_files = File.join(
          zip? ? importer_unzip_path : File.dirname(import_file_path), 'files', filename
        )
    end

    private

    def unique_collection_identifier(collection_hash)
      entry_uid = collection_hash[source_identifier]
      entry_uid ||= if Bulkrax.fill_in_blank_source_identifiers.present?
                      Bulkrax.fill_in_blank_source_identifiers.call(self, records.find_index(collection_hash))
                    else
                      collection_hash[:title].split(Bulkrax.multi_value_element_split_on).first
                    end

      entry_uid
    end

    # Override to return the first CSV in the path, if a zip file is supplied
    # We expect a single CSV at the top level of the zip in the CSVParser
    # but we are willing to go look for it if need be
    def real_import_file_path
      return Dir["#{importer_unzip_path}/**/*.csv"].first if file? && zip?

      parser_fields['import_file_path']
    end
  end
end