lib/flapjack/executive.rb in flapjack-0.6.39 vs lib/flapjack/executive.rb in flapjack-0.6.40

- old
+ new

@@ -10,24 +10,30 @@ require 'flapjack/filters/unscheduled_maintenance' require 'flapjack/filters/detect_mass_client_failures' require 'flapjack/filters/delays' require 'flapjack/data/contact' require 'flapjack/data/entity_check' +require 'flapjack/data/notification' require 'flapjack/data/event' -require 'flapjack/notification/common' require 'flapjack/notification/sms' require 'flapjack/notification/email' require 'flapjack/pikelet' +require 'flapjack/redis_pool' module Flapjack class Executive - include Flapjack::Pikelet + include Flapjack::GenericPikelet - def setup - @redis = build_redis_connection_pool + alias_method :generic_bootstrap, :bootstrap + alias_method :generic_cleanup, :cleanup + def bootstrap(opts = {}) + generic_bootstrap(opts) + + @redis = Flapjack::RedisPool.new(:config => opts[:redis_config], :size => 1) + @queues = {:email => @config['email_queue'], :sms => @config['sms_queue'], :jabber => @config['jabber_queue'], :pagerduty => @config['pagerduty_queue']} @@ -71,23 +77,24 @@ @redis.hset("event_counters:#{@instance_id}", 'ok', 0) @redis.hset("event_counters:#{@instance_id}", 'failure', 0) @redis.hset("event_counters:#{@instance_id}", 'action', 0) end - def main - setup + def cleanup + @redis.empty! if @redis + generic_cleanup + end + def main @logger.info("Booting main loop.") - until should_quit? + until should_quit? && @received_shutdown @logger.info("Waiting for event...") - event = Flapjack::Data::Event.next(:persistence => @redis) + event = Flapjack::Data::Event.next(:redis => @redis) process_event(event) unless event.nil? end - @redis.empty! if @redis - @logger.info("Exiting main loop.") end # this must use a separate connection to the main Executive one, as it's running # from a different fiber while the main one is blocking. @@ -100,11 +107,12 @@ end private def process_event(event) - @logger.debug("#{Flapjack::Data::Event.pending_count(:persistence => @redis)} events waiting on the queue") + pending = Flapjack::Data::Event.pending_count(:redis => @redis) + @logger.debug("#{pending} events waiting on the queue") @logger.debug("Raw event received: #{event.inspect}") time_at = event.time time_at_str = time_at ? ", #{Time.at(time_at).to_s}" : '' @logger.info("Processing Event: #{event.id}, #{event.type}, #{event.state}, #{event.summary}#{time_at_str}") @@ -127,18 +135,18 @@ @logger.info("#{Time.now}: Not sending notifications for event #{event.id} because these filters blocked: #{blocker_names.join(', ')}") return end @logger.info("#{Time.now}: Sending notifications for event #{event.id}") - generate_notification(event, entity_check) + send_notification_messages(event, entity_check) end def update_keys(event, entity_check) result = { :skip_filters => false } timestamp = Time.now.to_i @event_count = @redis.hincrby('event_counters', 'all', 1) - @event_count = @redis.hincrby("event_counters:#{@instance_id}", 'all', 1) + @redis.hincrby("event_counters:#{@instance_id}", 'all', 1) # FIXME skip if entity_check.nil? # FIXME: validate that the event is sane before we ever get here # FIXME: create an event if there is dodgy data @@ -189,20 +197,19 @@ if event.acknowledgement? && event.acknowledgement_id @redis.hdel('unacknowledged_failures', event.acknowledgement_id) end when 'shutdown' # should this be logged as an action instead? being minimally invasive for now - result[:shutdown] = true + result[:shutdown] = @received_shutdown = true end result end # takes an event for which a notification needs to be generated, works out the type of - # notification, updates the notification history in redis, calls other methods to work out who - # to notify, by what method, and finally to have the notifications sent - def generate_notification(event, entity_check) + # notification, updates the notification history in redis, sends the notifications + def send_notification_messages(event, entity_check) timestamp = Time.now.to_i notification_type = 'unknown' case event.type when 'service' case event.state @@ -213,82 +220,54 @@ end when 'action' case event.state when 'acknowledgement' notification_type = 'acknowledgement' + when 'test_notifications' + notification_type = 'test' end end @redis.set("#{event.id}:last_#{notification_type}_notification", timestamp) @redis.rpush("#{event.id}:#{notification_type}_notifications", timestamp) @logger.debug("Notification of type #{notification_type} is being generated for #{event.id}.") - send_notifications(event, notification_type, entity_check.contacts) - end + contacts = entity_check.contacts - # takes an event, a notification type, and an array of contacts and creates jobs in resque - # (eventually) for each notification - def send_notifications(event, notification_type, contacts) - notification = { 'event_id' => event.id, - 'state' => event.state, - 'summary' => event.summary, - 'time' => event.time, - 'notification_type' => notification_type } - if contacts.empty? @notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | NO CONTACTS") return - end + end - contacts.each {|contact| + notification = Flapjack::Data::Notification.for_event(event, :type => notification_type) - if contact.media.empty? - @notifylog.info("#{Time.now.to_s} | #{event.id} | #{notification_type} | #{contact.id} | NO MEDIA FOR CONTACT") + notification.messages(:contacts => contacts).each do |msg| + media_type = msg.medium.to_sym + + @notifylog.info("#{Time.now.to_s} | #{event.id} | " + + "#{notification_type} | #{msg.contact.id} | #{media_type.to_s} | #{msg.address}") + + unless @queues[media_type] + # TODO log error next end - notification.merge!({'contact_id' => contact.id, - 'contact_first_name' => contact.first_name, - 'contact_last_name' => contact.last_name, }) + contents = msg.contents - contact.media.each_pair {|media_type, address| + # TODO consider changing Resque jobs to use raw blpop like the others + case media_type + when :sms + Resque.enqueue_to(@queues[:sms], Flapjack::Notification::Sms, contents) + when :email + Resque.enqueue_to(@queues[:email], Flapjack::Notification::Email, contents) + when :jabber + # TODO move next line up into other notif value setting above? + contents['event_count'] = @event_count if @event_count + @redis.rpush(@queues[:jabber], Yajl::Encoder.encode(contents)) + when :pagerduty + @redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(contents)) + end - @notifylog.info("#{Time.now.to_s} | #{event.id} | " + - "#{notification_type} | #{contact.id} | #{media_type} | #{address}") - - # queue this notification - notif = notification.dup - notif['media'] = media_type - notif['address'] = address - notif['id'] = fuid - dur = event.duration - notif['duration'] = dur if dur - @logger.debug("send_notifications: sending notification: #{notif.inspect}") - - unless @queues[media_type.to_sym] - # TODO log error - next - end - - # TODO consider changing Resque jobs to use raw blpop like the others - case media_type - when "sms" - Resque.enqueue_to(@queues[:sms], Notification::Sms, notif) - when "email" - Resque.enqueue_to(@queues[:email], Notification::Email, notif) - when "jabber" - # TODO move next line up into other notif value setting above? - notif['event_count'] = @event_count if @event_count - @redis.rpush(@queues[:jabber], Yajl::Encoder.encode(notif)) - when "pagerduty" - @redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(notif)) - end - } - } - end - - # generates a fairly unique identifier to use as a message id - def fuid - fuid = self.object_id.to_i.to_s + '-' + Time.now.to_i.to_s + '.' + Time.now.tv_usec.to_s + end end end end