lib/flapjack/executive.rb in flapjack-0.6.37 vs lib/flapjack/executive.rb in flapjack-0.6.38
- old
+ new
@@ -43,11 +43,14 @@
@filters << Flapjack::Filters::UnscheduledMaintenance.new(options)
@filters << Flapjack::Filters::DetectMassClientFailures.new(options)
@filters << Flapjack::Filters::Delays.new(options)
@filters << Flapjack::Filters::Acknowledgement.new(options)
- @boot_time = Time.now
+ @boot_time = Time.now
+ @fqdn = `/bin/hostname -f`.chomp
+ @pid = Process.pid
+ @instance_id = "#{@fqdn}:#{@pid}"
# FIXME: all of the below keys assume there is only ever one executive running;
# we could generate a fuid and save it to disk, and prepend it from that
# point on...
@@ -59,10 +62,16 @@
@redis.hset('event_counters', 'all', 0)
@redis.hset('event_counters', 'ok', 0)
@redis.hset('event_counters', 'failure', 0)
@redis.hset('event_counters', 'action', 0)
end
+
+ @redis.zadd('executive_instances', @boot_time.to_i, @instance_id)
+ @redis.hset("event_counters:#{@instance_id}", 'all', 0)
+ @redis.hset("event_counters:#{@instance_id}", 'ok', 0)
+ @redis.hset("event_counters:#{@instance_id}", 'failure', 0)
+ @redis.hset("event_counters:#{@instance_id}", 'action', 0)
end
def main
setup
@@ -71,10 +80,13 @@
until should_quit?
@logger.info("Waiting for event...")
event = Flapjack::Data::Event.next(:persistence => @redis)
process_event(event) unless event.nil?
end
+
+ @redis.empty! if @redis
+
@logger.info("Exiting main loop.")
end
# this must use a separate connection to the main Executive one, as it's running
# from a different fiber while the main one is blocking.
@@ -121,10 +133,11 @@
def update_keys(event, entity_check)
result = { :skip_filters => false }
timestamp = Time.now.to_i
@event_count = @redis.hincrby('event_counters', 'all', 1)
+ @event_count = @redis.hincrby("event_counters:#{@instance_id}", 'all', 1)
# FIXME skip if entity_check.nil?
# FIXME: validate that the event is sane before we ever get here
# FIXME: create an event if there is dodgy data
@@ -135,12 +148,14 @@
# Track when we last saw an event for a particular entity:check pair
entity_check.last_update = timestamp
if event.ok?
@redis.hincrby('event_counters', 'ok', 1)
+ @redis.hincrby("event_counters:#{@instance_id}", 'ok', 1)
elsif event.failure?
@redis.hincrby('event_counters', 'failure', 1)
+ @redis.hincrby("event_counters:#{@instance_id}", 'failure', 1)
@redis.hset('unacknowledged_failures', @event_count, event.id)
end
event.previous_state = entity_check.state
@logger.info("No previous state for event #{event.id}") if event.previous_state.nil?
@@ -165,10 +180,11 @@
# Action events represent human or automated interaction with Flapjack
when 'action'
# When an action event is processed, store the event.
@redis.hset(event.id + ':actions', timestamp, event.state)
- @redis.hincrby('event_counters', 'action', 1) if event.ok?
+ @redis.hincrby('event_counters', 'action', 1)
+ @redis.hincrby("event_counters:#{@instance_id}", 'action', 1)
if event.acknowledgement? && event.acknowledgement_id
@redis.hdel('unacknowledged_failures', event.acknowledgement_id)
end
when 'shutdown'