lib/rocket_job/worker.rb in rocketjob-2.1.3 vs lib/rocket_job/worker.rb in rocketjob-3.0.0.alpha
- old
+ new
@@ -1,403 +1,113 @@
# encoding: UTF-8
require 'concurrent'
+require 'forwardable'
module RocketJob
# Worker
#
- # On startup a worker instance will automatically register itself
- # if not already present
- #
- # Starting a worker in the foreground:
- # - Using a Rails runner:
- # bin/rocketjob
- #
- # Starting a worker in the background:
- # - Using a Rails runner:
- # nohup bin/rocketjob --quiet 2>&1 1>output.log &
- #
- # Stopping a worker:
- # - Stop the worker via the Web UI
- # - Send a regular kill signal to make it shutdown once all active work is complete
- # kill <pid>
- # - Or, use the following Ruby code:
- # worker = RocketJob::Worker.where(name: 'worker name').first
- # worker.stop!
- #
- # Sending the kill signal locally will result in starting the shutdown process
- # immediately. Via the UI or Ruby code the worker can take up to 15 seconds
- # (the heartbeat interval) to start shutting down.
+ # A worker runs on a single operating system thread
+ # Is usually started under a RocketJob server process.
class Worker
- include Plugins::Document
- include Plugins::StateMachine
include SemanticLogger::Loggable
+ include ActiveSupport::Callbacks
+ extend Forwardable
- # @formatter:off
- # Unique Name of this worker instance
- # Default: `host name:PID`
- # The unique name is used on re-start to re-queue any jobs that were being processed
- # at the time the worker or host unexpectedly terminated, if any
- key :name, String, default: -> { "#{SemanticLogger.host}:#{$$}" }
+ def_delegator :@thread, :alive?
+ def_delegator :@thread, :backtrace
+ def_delegator :@thread, :join
- # The maximum number of threads that this worker should use
- # If set, it will override the default value in RocketJob::Config
- key :max_threads, Integer, default: -> { Config.instance.max_worker_threads }
+ define_callbacks :running
- # When this worker process was started
- key :started_at, Time
+ attr_accessor :id, :worker_name, :inline
+ attr_reader :thread, :name
- # The heartbeat information for this worker
- has_one :heartbeat, class_name: 'RocketJob::Heartbeat'
-
- # Current state
- # Internal use only. Do not set this field directly
- key :state, Symbol, default: :starting
-
- validates_presence_of :state, :name, :max_threads
-
- # States
- # :starting -> :running -> :paused
- # -> :stopping
- aasm column: :state do
- state :starting, initial: true
- state :running
- state :paused
- state :stopping
-
- event :started do
- transitions from: :starting, to: :running
- before do
- self.started_at = Time.now
- end
- end
-
- event :pause do
- transitions from: :running, to: :paused
- end
-
- event :resume do
- transitions from: :paused, to: :running
- end
-
- event :stop do
- transitions from: :running, to: :stopping
- transitions from: :paused, to: :stopping
- transitions from: :starting, to: :stopping
- end
+ def self.before_running(*filters, &blk)
+ set_callback(:running, :before, *filters, &blk)
end
- # @formatter:on
- # Requeue any jobs being worked by this worker when it is destroyed
- before_destroy :requeue_jobs
-
- # Run the worker process
- # Attributes supplied are passed to #new
- def self.run(attrs={})
- Thread.current.name = 'rocketjob main'
- create_indexes
- register_signal_handlers
- if defined?(RocketJobPro) && (RocketJob::Job.database.name != RocketJob::Jobs::PerformanceJob.database.name)
- raise 'The RocketJob configuration is being applied after the system has been initialized'
- end
-
- worker = create!(attrs)
- if worker.max_threads == 0
- # Does not start any additional threads and runs the worker in the current thread.
- # No heartbeats are performed. So this worker will appear as a zombie in RJMC.
- # Designed for profiling purposes where a single thread is much simpler to profile.
- worker.started!
- worker.send(:worker, 0)
- else
- worker.send(:run)
- end
-
- ensure
- worker.destroy if worker
+ def self.after_running(*filters, &blk)
+ set_callback(:running, :after, *filters, &blk)
end
- # Create indexes
- def self.create_indexes
- ensure_index [[:name, 1]], background: true, unique: true
- # Also create indexes for the jobs collection
- Job.create_indexes
+ def self.around_running(*filters, &blk)
+ set_callback(:running, :around, *filters, &blk)
end
- # Destroy's all instances of zombie workers and requeues any jobs still "running"
- # on those workers
- def self.destroy_zombies
- count = 0
- each do |worker|
- next unless worker.zombie?
- logger.warn "Destroying zombie worker #{worker.name}, and requeueing its jobs"
- worker.destroy
- count += 1
+ def initialize(id: 0, server_name: 'inline', inline: false)
+ @id = id
+ @server_name = server_name
+ if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
+ @shutdown = Concurrent::AtomicBoolean.new(false)
+ else
+ @shutdown = false
end
- count
+ @name = "#{server_name}:#{id}"
+ @thread = Thread.new { run } unless inline
end
- # Stop all running, paused, or starting workers
- def self.stop_all
- where(state: [:running, :paused, :starting]).each(&:stop!)
- end
-
- # Pause all running workers
- def self.pause_all
- running.each(&:pause!)
- end
-
- # Resume all paused workers
- def self.resume_all
- paused.each(&:resume!)
- end
-
- # Returns [Hash<String:Integer>] of the number of workers in each state.
- # Note: If there are no workers in that particular state then the hash will not have a value for it.
- #
- # Example workers in every state:
- # RocketJob::Worker.counts_by_state
- # # => {
- # :aborted => 1,
- # :completed => 37,
- # :failed => 1,
- # :paused => 3,
- # :queued => 4,
- # :running => 1,
- # :queued_now => 1,
- # :scheduled => 3
- # }
- #
- # Example no workers active:
- # RocketJob::Worker.counts_by_state
- # # => {}
- def self.counts_by_state
- counts = {}
- collection.aggregate([
- {
- '$group' => {
- _id: '$state',
- count: {'$sum' => 1}
- }
- }
- ]
- ).each do |result|
- counts[result['_id'].to_sym] = result['count']
- end
- counts
- end
-
- # Returns [Boolean] whether the worker is shutting down
- def shutting_down?
- self.class.shutdown? || !running?
- end
-
- # Returns [true|false] if this worker has missed at least the last 4 heartbeats
- #
- # Possible causes for a worker to miss its heartbeats:
- # - The worker process has died
- # - The worker process is "hanging"
- # - The worker is no longer able to communicate with the MongoDB Server
- def zombie?(missed = 4)
- return false unless running? || stopping?
- return true if heartbeat.nil? || heartbeat.updated_at.nil?
- dead_seconds = Config.instance.heartbeat_seconds * missed
- (Time.now - heartbeat.updated_at) >= dead_seconds
- end
-
- # On MRI the 'concurrent-ruby-ext' gem may not be loaded
if defined?(Concurrent::JavaAtomicBoolean) || defined?(Concurrent::CAtomicBoolean)
- # Returns [true|false] whether the shutdown indicator has been set for this worker process
- def self.shutdown?
- @@shutdown.value
+ # Tells this worker to shutdown as soon the current job/slice is complete
+ def shutdown!
+ @shutdown.make_true
end
- # Set shutdown indicator for this worker process
- def self.shutdown!
- @@shutdown.make_true
+ def shutdown?
+ @shutdown.value
end
-
- @@shutdown = Concurrent::AtomicBoolean.new(false)
else
- # Returns [true|false] whether the shutdown indicator has been set for this worker process
- def self.shutdown?
- @@shutdown
+ def shutdown!
+ @shutdown = true
end
- # Set shutdown indicator for this worker process
- def self.shutdown!
- @@shutdown = true
+ def shutdown?
+ @shutdown
end
-
- @@shutdown = false
end
private
- attr_reader :worker_threads
-
- # Returns [Array<Thread>] collection of created worker threads
- def worker_threads
- @worker_threads ||= []
- end
-
- # Management Thread
+ # Process jobs until it shuts down
+ #
+ # Params
+ # worker_id [Integer]
+ # The number of this worker for logging purposes
def run
- logger.info "Using MongoDB Database: #{RocketJob::Job.database.name}"
- build_heartbeat(updated_at: Time.now, current_threads: 0)
- started!
- adjust_worker_threads(true)
- logger.info "RocketJob Worker started with #{max_threads} workers running"
-
- count = 0
- while running? || paused?
- sleep Config.instance.heartbeat_seconds
-
- update_attributes_and_reload(
- 'heartbeat.updated_at' => Time.now,
- 'heartbeat.current_threads' => worker_count
- )
-
- # In case number of threads has been modified
- adjust_worker_threads
-
- # Stop worker if shutdown indicator was set
- stop! if self.class.shutdown? && may_stop?
- end
-
- logger.info 'Waiting for worker threads to stop'
- while thread = worker_threads.first
- if thread.join(5)
- # Worker thread is dead
- worker_threads.shift
- else
- # Timeout waiting for thread to stop
- begin
- update_attributes_and_reload(
- 'heartbeat.updated_at' => Time.now,
- 'heartbeat.current_threads' => worker_count
- )
- rescue MongoMapper::DocumentNotFound
- logger.warn('Worker has been destroyed. Going down hard!')
- break
- end
- end
- end
- logger.info 'Shutdown'
- rescue Exception => exc
- logger.error('RocketJob::Worker is stopping due to an exception', exc)
- end
-
- # Returns [Fixnum] number of workers (threads) that are alive
- def worker_count
- worker_threads.count { |i| i.alive? }
- end
-
- def next_worker_id
- @worker_id ||= 0
- @worker_id += 1
- end
-
- # Re-adjust the number of running threads to get it up to the
- # required number of threads
- # Parameters
- # stagger_threads
- # Whether to stagger when the threads poll for work the first time
- # It spreads out the queue polling over the max_poll_seconds so
- # that not all workers poll at the same time
- # The worker also respond faster than max_poll_seconds when a new
- # job is added.
- def adjust_worker_threads(stagger_threads=false)
- count = worker_count
- # Cleanup threads that have stopped
- if count != worker_threads.count
- logger.info "Cleaning up #{worker_threads.count - count} threads that went away"
- worker_threads.delete_if { |t| !t.alive? }
- end
-
- return if shutting_down?
-
- # Need to add more threads?
- if count < max_threads
- thread_count = max_threads - count
- logger.info "Starting #{thread_count} threads"
- thread_count.times.each do
- # Start worker thread
- worker_threads << Thread.new(next_worker_id) do |id|
- begin
- sleep (Config.instance.max_poll_seconds.to_f / max_threads) * (id - 1) if stagger_threads
- worker(id)
- rescue Exception => exc
- logger.fatal('Cannot start worker thread', exc)
- end
- end
- end
- end
- end
-
- # Keep processing jobs until worker stops running
- def worker(worker_id)
- Thread.current.name = 'rocketjob %03i' % worker_id
+ Thread.current.name = 'rocketjob %03i' % id
logger.info 'Started'
- while !shutting_down?
+ while !shutdown?
if process_available_jobs
# Keeps workers staggered across the poll interval so that
# all workers don't poll at the same time
sleep rand(RocketJob::Config.instance.max_poll_seconds * 1000) / 1000
else
- break if shutting_down?
+ break if shutdown?
sleep RocketJob::Config.instance.max_poll_seconds
end
end
- logger.info "Stopping. Worker state: #{state.inspect}"
+ logger.info 'Stopping'
rescue Exception => exc
logger.fatal('Unhandled exception in job processing thread', exc)
ensure
+ # TODO: Move to after_running callback
ActiveRecord::Base.clear_active_connections! if defined?(ActiveRecord::Base)
end
# Process the next available job
# Returns [Boolean] whether any job was actually processed
def process_available_jobs
skip_job_ids = []
processed = false
- while (job = Job.rocket_job_next_job(name, skip_job_ids)) && !shutting_down?
+ while (job = Job.rocket_job_next_job(worker_name, skip_job_ids)) && !shutdown?
logger.fast_tag("job:#{job.id}") do
if job.rocket_job_work(self)
# Need to skip the specified job due to throttling or no work available
skip_job_ids << job.id
else
processed = true
end
end
end
processed
- end
-
- # Register handlers for the various signals
- # Term:
- # Perform clean shutdown
- #
- def self.register_signal_handlers
- begin
- Signal.trap 'SIGTERM' do
- shutdown!
- message = 'Shutdown signal (SIGTERM) received. Will shutdown as soon as active jobs/slices have completed.'
- # Logging uses a mutex to access Queue on MRI/CRuby
- defined?(JRuby) ? logger.warn(message) : puts(message)
- end
-
- Signal.trap 'INT' do
- shutdown!
- message = 'Shutdown signal (INT) received. Will shutdown as soon as active jobs/slices have completed.'
- # Logging uses a mutex to access Queue on MRI/CRuby
- defined?(JRuby) ? logger.warn(message) : puts(message)
- end
- rescue StandardError
- logger.warn 'SIGTERM handler not installed. Not able to shutdown gracefully'
- end
- end
-
- # Requeue any jobs assigned to this worker when it is destroyed
- def requeue_jobs
- RocketJob::Job.requeue_dead_worker(name)
end
end
end