lib/flapjack/executive.rb in flapjack-0.6.37 vs lib/flapjack/executive.rb in flapjack-0.6.38

- old
+ new

@@ -43,11 +43,14 @@ @filters << Flapjack::Filters::UnscheduledMaintenance.new(options) @filters << Flapjack::Filters::DetectMassClientFailures.new(options) @filters << Flapjack::Filters::Delays.new(options) @filters << Flapjack::Filters::Acknowledgement.new(options) - @boot_time = Time.now + @boot_time = Time.now + @fqdn = `/bin/hostname -f`.chomp + @pid = Process.pid + @instance_id = "#{@fqdn}:#{@pid}" # FIXME: all of the below keys assume there is only ever one executive running; # we could generate a fuid and save it to disk, and prepend it from that # point on... @@ -59,10 +62,16 @@ @redis.hset('event_counters', 'all', 0) @redis.hset('event_counters', 'ok', 0) @redis.hset('event_counters', 'failure', 0) @redis.hset('event_counters', 'action', 0) end + + @redis.zadd('executive_instances', @boot_time.to_i, @instance_id) + @redis.hset("event_counters:#{@instance_id}", 'all', 0) + @redis.hset("event_counters:#{@instance_id}", 'ok', 0) + @redis.hset("event_counters:#{@instance_id}", 'failure', 0) + @redis.hset("event_counters:#{@instance_id}", 'action', 0) end def main setup @@ -71,10 +80,13 @@ until should_quit? @logger.info("Waiting for event...") event = Flapjack::Data::Event.next(:persistence => @redis) process_event(event) unless event.nil? end + + @redis.empty! if @redis + @logger.info("Exiting main loop.") end # this must use a separate connection to the main Executive one, as it's running # from a different fiber while the main one is blocking. @@ -121,10 +133,11 @@ def update_keys(event, entity_check) result = { :skip_filters => false } timestamp = Time.now.to_i @event_count = @redis.hincrby('event_counters', 'all', 1) + @event_count = @redis.hincrby("event_counters:#{@instance_id}", 'all', 1) # FIXME skip if entity_check.nil? # FIXME: validate that the event is sane before we ever get here # FIXME: create an event if there is dodgy data @@ -135,12 +148,14 @@ # Track when we last saw an event for a particular entity:check pair entity_check.last_update = timestamp if event.ok? @redis.hincrby('event_counters', 'ok', 1) + @redis.hincrby("event_counters:#{@instance_id}", 'ok', 1) elsif event.failure? @redis.hincrby('event_counters', 'failure', 1) + @redis.hincrby("event_counters:#{@instance_id}", 'failure', 1) @redis.hset('unacknowledged_failures', @event_count, event.id) end event.previous_state = entity_check.state @logger.info("No previous state for event #{event.id}") if event.previous_state.nil? @@ -165,10 +180,11 @@ # Action events represent human or automated interaction with Flapjack when 'action' # When an action event is processed, store the event. @redis.hset(event.id + ':actions', timestamp, event.state) - @redis.hincrby('event_counters', 'action', 1) if event.ok? + @redis.hincrby('event_counters', 'action', 1) + @redis.hincrby("event_counters:#{@instance_id}", 'action', 1) if event.acknowledgement? && event.acknowledgement_id @redis.hdel('unacknowledged_failures', event.acknowledgement_id) end when 'shutdown'