Sha256: 2d4d1286ec81c9cf0fd7106097d1c933255f7ad240414f29f40f25a04f388537

Contents?: true

Size: 1.71 KB

Versions: 27

Compression:

Stored size: 1.71 KB

Contents

module EasyML
  class TrainingJob < ApplicationJob
    class TrainingTimeoutError < StandardError; end

    INACTIVITY_TIMEOUT = 15 # seconds

    def perform(model_id)
      @model = EasyML::Model.find_by(id: model_id)
      return if @model.nil?

      @last_activity = Time.current
      setup_signal_traps
      # @monitor_thread = start_monitor_thread

      @model.actually_train do |iteration_info|
        @last_activity = Time.current
      end
    ensure
      #   @monitor_thread&.exit
    end

    private

    def setup_signal_traps
      # Handle graceful shutdown on SIGTERM
      Signal.trap("TERM") do
        puts "Received SIGTERM, cleaning up..."
        cleanup("Training process terminated")
        raise TrainingTimeoutError, "Training process terminated"
      end

      # Handle Ctrl+C
      Signal.trap("INT") do
        puts "Received SIGINT, cleaning up..."
        cleanup("Training process interrupted")
        raise TrainingTimeoutError, "Training process interrupted"
      end
    end

    def cleanup(error_message)
      return if @cleaned_up
      @cleaned_up = true
      @model.last_run.update(status: "failed", error_message: error_message, completed_at: Time.current)
      @model.update(is_training: false)
    end

    def start_monitor_thread
      Thread.new do
        while true
          puts "Monitoring activity... #{Time.current - @last_activity}"
          if Time.current - @last_activity >= INACTIVITY_TIMEOUT
            puts "Training process inactive for #{INACTIVITY_TIMEOUT} seconds, terminating..."
            cleanup("Training process timed out")
            Thread.main.raise(TrainingTimeoutError)
            break
          end
          sleep 1
        end
      end
    end
  end
end

Version data entries

27 entries across 27 versions & 1 rubygems

Version Path
easy_ml-0.2.0.pre.rc27 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc26 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc25 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc24 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc23 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc22 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc21 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc20 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc19 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc18 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc17 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc16 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc15 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc14 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc13 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc12 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc11 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc10 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc9 app/jobs/easy_ml/training_job.rb
easy_ml-0.2.0.pre.rc8 app/jobs/easy_ml/training_job.rb