require 'uri'
require 'tmpdir'
require 'browse_everything/retriever'

# Given a FileSet that has an import_url property,
# download that file and put it into Fedora
# Called by AttachFilesToWorkJob (when files are uploaded to s3)
# and CreateWithRemoteFilesActor when files are located in some other service.
class ImportUrlJob < Hyrax::ApplicationJob
  queue_as Hyrax.config.ingest_queue_name

  before_enqueue do |job|
    operation.pending_job(job)
  end

  # Retrieves the operation for the job
  def operation
    arguments.reduce(:merge).fetch(:operation)
  end

  # @param [FileSet] file_set
  # @param [Hyrax::BatchCreateOperation] operation
  def perform(file_set, operation, headers = {})
    operation.performing!
    user = User.find_by_user_key(file_set.depositor)
    uri = URI(file_set.import_url)
    # @todo Use Hydra::Works::AddExternalFileToFileSet instead of manually
    #       copying the file here. This will be gnarly.
    copy_remote_file(uri, headers) do |f|
      # reload the FileSet once the data is copied since this is a long running task
      file_set.reload

      # FileSetActor operates synchronously so that this tempfile is available.
      # If asynchronous, the job might be invoked on a machine that did not have this temp file on its file system!
      # NOTE: The return status may be successful even if the content never attaches.
      if Hyrax::Actors::FileSetActor.new(file_set, user).create_content(f, from_url: true)
        operation.success!
      else
        # send message to user on download failure
        Hyrax.config.callback.run(:after_import_url_failure, file_set, user)
        operation.fail!(file_set.errors.full_messages.join(' '))
      end
    end
  end

  private

    # Download file from uri, yields a block with a file in a temporary directory.
    # It is important that the file on disk has the same file name as the URL,
    # because when the file in added into Fedora the file name will get persisted in the
    # metadata.
    # @param uri [URI] the uri of the file to download
    # @yield [IO] the stream to write to
    def copy_remote_file(uri, headers = {})
      filename = File.basename(uri.path)
      dir = Dir.mktmpdir
      Rails.logger.debug("ImportUrlJob: Copying <#{uri}> to #{dir}")

      File.open(File.join(dir, filename), 'wb') do |f|
        retriever = BrowseEverything::Retriever.new
        uri_spec = { 'url' => uri }.merge(headers)
        retriever.retrieve(uri_spec) do |chunk|
          f.write(chunk)
        end
        f.rewind
        yield f
      end
      Rails.logger.debug("ImportUrlJob: Closing #{File.join(dir, filename)}")
    end
end