lib/flapjack/executive.rb in flapjack-0.6.61 vs lib/flapjack/executive.rb in flapjack-0.7.0
- old
+ new
@@ -1,9 +1,11 @@
#!/usr/bin/env ruby
require 'log4r'
require 'log4r/outputter/fileoutputter'
+require 'tzinfo'
+require 'active_support/time'
require 'flapjack/filters/acknowledgement'
require 'flapjack/filters/ok'
require 'flapjack/filters/scheduled_maintenance'
require 'flapjack/filters/unscheduled_maintenance'
@@ -12,18 +14,21 @@
require 'flapjack/data/contact'
require 'flapjack/data/entity_check'
require 'flapjack/data/notification'
require 'flapjack/data/event'
require 'flapjack/redis_pool'
+require 'flapjack/utility'
require 'flapjack/gateways/email'
require 'flapjack/gateways/sms_messagenet'
module Flapjack
class Executive
+ include Flapjack::Utility
+
def initialize(opts = {})
@config = opts[:config]
@redis_config = opts[:redis_config]
@logger = opts[:logger]
@redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2) # first will block
@@ -32,13 +37,28 @@
:sms => @config['sms_queue'],
:jabber => @config['jabber_queue'],
:pagerduty => @config['pagerduty_queue']}
notifylog = @config['notification_log_file'] || 'log/notify.log'
+ if not File.directory?(File.dirname(notifylog))
+ puts "Parent directory for log file #{notifylog} doesn't exist"
+ puts "Exiting!"
+ exit
+ end
@notifylog = Log4r::Logger.new("executive")
@notifylog.add(Log4r::FileOutputter.new("notifylog", :filename => notifylog))
+ tz = nil
+ tz_string = @config['default_contact_timezone'] || ENV['TZ'] || 'UTC'
+ begin
+ tz = ActiveSupport::TimeZone.new(tz_string)
+ rescue ArgumentError
+ logger.error("Invalid timezone string specified in default_contact_timezone or TZ (#{tz_string})")
+ exit 1
+ end
+ @default_contact_timezone = tz
+
# FIXME: Put loading filters into separate method
# FIXME: should we make the filters more configurable by the end user?
options = { :log => opts[:logger], :persistence => @redis }
@filters = []
@filters << Flapjack::Filters::Ok.new(options)
@@ -126,11 +146,11 @@
@logger.debug("Not generating notifications for event #{event.id} because this filter blocked: #{blocker.name}")
return
end
@logger.info("Generating notifications for event #{event.id}, #{event.type}, #{event.state}, #{event.summary}#{time_at_str}")
- send_notification_messages(event, entity_check)
+ generate_notification_messages(event, entity_check)
end
def update_keys(event, entity_check)
result = { :skip_filters => false }
timestamp = Time.now.to_i
@@ -196,12 +216,12 @@
result
end
# takes an event for which a notification needs to be generated, works out the type of
- # notification, updates the notification history in redis, sends the notifications
- def send_notification_messages(event, entity_check)
+ # notification, updates the notification history in redis, generates the notifications
+ def generate_notification_messages(event, entity_check)
timestamp = Time.now.to_i
notification_type = 'unknown'
case event.type
when 'service'
case event.state
@@ -217,11 +237,13 @@
when 'test_notifications'
notification_type = 'test'
end
end
@redis.set("#{event.id}:last_#{notification_type}_notification", timestamp)
+ @redis.set("#{event.id}:last_#{event.state}_notification", timestamp) if event.failure?
@redis.rpush("#{event.id}:#{notification_type}_notifications", timestamp)
+ @redis.rpush("#{event.id}:#{event.state}_notifications", timestamp) if event.failure?
@logger.debug("Notification of type #{notification_type} is being generated for #{event.id}.")
contacts = entity_check.contacts
if contacts.empty?
@@ -229,25 +251,137 @@
return
end
notification = Flapjack::Data::Notification.for_event(event, :type => notification_type)
- notification.messages(:contacts => contacts).each do |msg|
- media_type = msg.medium.to_sym
+ enqueue_messages( apply_notification_rules( notification.messages(:contacts => contacts) ) )
- @notifylog.info("#{Time.now.to_s} | #{event.id} | " +
- "#{notification_type} | #{msg.contact.id} | #{media_type.to_s} | #{msg.address}")
+ end
- unless @queues[media_type]
+ # time restrictions match?
+ # nil rule.time_restrictions matches
+ # times (start, end) within time restrictions will have any UTC offset removed and will be
+ # considered to be in the timezone of the contact
+ def rule_occurring_now?(rule, opts)
+ contact = opts[:contact]
+ return true if rule.time_restrictions.nil? or rule.time_restrictions.empty?
+
+ time_zone = contact.timezone(:default => @default_contact_timezone)
+ usertime = time_zone.now
+
+ match = rule.time_restrictions.any? do |tr|
+ # add contact's timezone to the time restriction hash
+ tr = Flapjack::Data::NotificationRule.time_restriction_to_ice_cube_hash(tr, time_zone)
+
+ schedule = IceCube::Schedule.from_hash(tr)
+ schedule.occurring_at?(usertime)
+ end
+ !!match
+ end
+
+ # delete messages based on entity name(s), tags, severity, time of day
+ def apply_notification_rules(messages)
+ # first get all rules matching entity and time
+ @logger.debug "apply_notification_rules: got messages with size #{messages.size}"
+
+ # don't consider notification rules if the contact has none
+
+ tuple = messages.map do |message|
+ @logger.debug "considering message: #{message.medium} #{message.notification.event.id} #{message.notification.event.state}"
+ @logger.debug "contact_id: #{message.contact.id}"
+ rules = message.contact.notification_rules
+ @logger.debug "found #{rules.length} rules for this message's contact"
+ event_id = message.notification.event.id
+ options = {}
+ options[:no_rules_for_contact] = true if rules.empty?
+ # filter based on entity, tags, severity, time of day
+ matchers = rules.find_all do |rule|
+ rule.match_entity?(event_id) && rule_occurring_now?(rule, :contact => message.contact)
+ end
+ [message, matchers, options]
+ end
+
+ # matchers are rules of the contact that have matched the current event
+ # for time and entity
+
+ @logger.debug "apply_notification_rules: num messages after entity and time matching: #{tuple.size}"
+
+ # delete the matcher for all entities if there are more specific matchers
+ tuple = tuple.map do |message, matchers, options|
+ if matchers.length > 1
+ have_specific = matchers.detect do |matcher|
+ matcher.entities or matcher.entity_tags
+ end
+ if have_specific
+ # delete the rule for all entities
+ matchers.map! do |matcher|
+ matcher.entities.nil? and matcher.entity_tags.nil? ? nil : matcher
+ end
+ end
+ end
+ [message, matchers, options]
+ end
+
+ # delete media based on blackholes
+ tuple = tuple.find_all do |message, matchers, options|
+ severity = message.notification.event.state
+ # or use message.notification.contents['state']
+ matchers.none? {|matcher| matcher.blackhole?(severity) }
+ end
+
+ @logger.debug "apply_notification_rules: num messages after removing blackhole matches: #{tuple.size}"
+
+ # delete any media that doesn't meet severity<->media constraints
+ tuple = tuple.find_all do |message, matchers, options|
+ severity = message.notification.event.state
+ options[:no_rules_for_contact] ||
+ matchers.any? {|matcher|
+ matcher.media_for_severity(severity).include?(message.medium) ||
+ (@logger.warn("got nil for matcher.media_for_severity(#{severity}), matcher: #{matcher.inspect}") && false)
+ }
+ end
+
+ @logger.debug "apply_notification_rules: num messages after severity-media constraints: #{tuple.size}"
+
+ # delete media based on notification interval
+ tuple = tuple.find_all do |message, matchers, options|
+ not message.contact.drop_notifications?(:media => message.medium,
+ :check => message.notification.event.id,
+ :state => message.notification.event.state)
+ end
+
+ @logger.debug "apply_notification_rules: num messages after pruning for notification intervals: #{tuple.size}"
+
+ tuple.map do |message, matchers, options|
+ message
+ end
+ end
+
+ def enqueue_messages(messages)
+
+ messages.each do |message|
+ media_type = message.medium
+ contents = message.contents
+ event_id = message.notification.event.id
+
+ @notifylog.info("#{Time.now.to_s} | #{event_id} | " +
+ "#{message.notification.type} | #{message.contact.id} | #{media_type} | #{message.address}")
+
+ unless @queues[media_type.to_sym]
@logger.error("no queue for media type: #{media_type}")
- next
+ return
end
- contents = msg.contents
+ @logger.info("Enqueueing #{media_type} alert for #{event_id} to #{message.address}")
+ message.contact.update_sent_alert_keys(:media => message.medium,
+ :check => message.notification.event.id,
+ :state => message.notification.event.state)
+ # drop_alerts_for_contact:#{self.id}:#{media}:#{check}:#{state}
+
# TODO consider changing Resque jobs to use raw blpop like the others
- case media_type
+ case media_type.to_sym
when :sms
Resque.enqueue_to(@queues[:sms], Flapjack::Gateways::SmsMessagenet, contents)
when :email
Resque.enqueue_to(@queues[:email], Flapjack::Gateways::Email, contents)
when :jabber
@@ -255,11 +389,11 @@
contents['event_count'] = @event_count if @event_count
@redis.rpush(@queues[:jabber], Yajl::Encoder.encode(contents))
when :pagerduty
@redis.rpush(@queues[:pagerduty], Yajl::Encoder.encode(contents))
end
-
end
- end
+
+ end
end
end