require "fileutils"
module RocketJob
  module Jobs
    # Dirmon monitors folders for files matching the criteria specified in DirmonEntry
    #
    # * The first time Dirmon runs it gathers the names of files in the monitored
    #   folders.
    # * On completion Dirmon kicks off a new Dirmon job passing it the list
    #   of known files.
    # * On each subsequent Dirmon run it checks the size of each file against the
    #   previous list of known files, and only if the file size has not changed
    #   the corresponding job is started for that file.
    # * If the job implements #upload, that method is called
    #   and then the file is deleted, or moved to the archive_directory if supplied

    # * Otherwise, the file is moved to the supplied archive_directory (defaults to
    #   `_archive` in the same folder as the file itself. The absolute path and
    #   file name of the archived file is passed into the job as either
    #   `upload_file_name` or `full_file_name`.

    # Notes:
    # - Jobs that do not implement #upload _must_ have either `upload_file_name` or `full_file_name` as an attribute.
    #
    # With RocketJob Pro, the file is automatically uploaded into the job itself
    # using the job's #upload method, after which the file is archived or deleted
    # if no archive_directory was specified in the DirmonEntry.
    #
    # To start Dirmon for the first time
    #   RocketJob::Jobs::DirmonJob.create!
    #
    # If another DirmonJob instance is already queued or running, then the create
    # above will fail with:
    #   Validation failed: State Another instance of this job is already queued or running
    #
    # Or to start DirmonJob and ignore errors if already running
    #   RocketJob::Jobs::DirmonJob.create
    class DirmonJob < RocketJob::Job
      include RocketJob::Plugins::Cron

      # Runs every 5 minutes by default
      self.cron_schedule = "*/5 * * * * UTC"
      self.description   = "Directory Monitor"
      self.priority      = 30

      # Prevent multiple dirmon jobs from running at the same time by only
      # creating the next instance when this one finishes.
      self.cron_after_start = false

      # Hash[file_name, size]
      field :previous_file_names, type: Hash, default: {}, copy_on_restart: true

      # Checks the directories for new files, starting jobs if files have not changed since the last run.
      def perform
        check_directories
      end

      private

      # Iterate over each Dirmon Entry looking for new files
      # If a new file is found, it is not processed immediately, instead
      # it is passed to the next run of this job along with the file size.
      # If the file size has not changed, the Job is kicked off.
      def check_directories
        new_file_names = {}
        DirmonEntry.enabled.each do |dirmon_entry|
          check_entry(dirmon_entry, new_file_names)
        end
        self.previous_file_names = new_file_names
      end

      def check_entry(dirmon_entry, file_names)
        dirmon_entry.each do |path|
          # Skip file size checking since S3 files are only visible once completely uploaded.
          unless path.partial_files_visible?
            logger.info("File: #{path}. Starting: #{dirmon_entry.job_class_name}")
            dirmon_entry.later(path)
            next
          end

          # BSON Keys cannot contain periods
          key           = path.to_s.tr(".", "_")
          previous_size = previous_file_names[key]
          # Check every few minutes for a file size change before trying to process the file.
          size            = check_file(dirmon_entry, path, previous_size)
          file_names[key] = size if size
        end
      rescue StandardError => e
        logger.error("Dirmon Entry: #{dirmon_entry.id} failed. Moved to `failed` state to prevent processing again without manual intervention.", e)
        dirmon_entry.fail(worker_name, e)
        dirmon_entry.save(validate: false)
      end

      # Checks if a file should result in starting a job
      # Returns [Integer] file size, or nil if the file started a job
      def check_file(dirmon_entry, path, previous_size)
        size = path.size
        if previous_size && (previous_size == size)
          logger.info("File stabilized: #{path}. Starting: #{dirmon_entry.job_class_name}")
          dirmon_entry.later(path)
          nil
        else
          logger.info("Found file: #{path}. File size: #{size}")
          # Keep for the next run
          size
        end
      end
    end
  end
end