lib/flapjack/gateways/pagerduty.rb in flapjack-0.6.53 vs lib/flapjack/gateways/pagerduty.rb in flapjack-0.6.54
- old
+ new
@@ -7,72 +7,60 @@
require 'flapjack/data/entity_check'
require 'flapjack/data/global'
require 'flapjack/redis_pool'
-require 'flapjack/gateways/base'
-
module Flapjack
module Gateways
class Pagerduty
- include Flapjack::Gateways::Generic
-
PAGERDUTY_EVENTS_API_URL = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'
SEM_PAGERDUTY_ACKS_RUNNING = 'sem_pagerduty_acks_running'
- alias_method :generic_bootstrap, :bootstrap
- alias_method :generic_cleanup, :cleanup
-
- def bootstrap(opts = {})
- generic_bootstrap(opts)
-
+ def initialize(opts = {})
+ @config = opts[:config]
+ @logger = opts[:logger]
@redis_config = opts[:redis_config]
- @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
+ @redis = Flapjack::RedisPool.new(:config => @redis_config, :size => 2) # first will block
- logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")
+ @logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")
@pagerduty_acks_started = nil
+ super()
end
- def cleanup
- @redis.empty! if @redis
- @redis_timer.empty! if @redis_timer
- generic_cleanup
+ def stop
+ @logger.info("stopping")
+ @should_quit = true
+ @redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
end
- def add_shutdown_event(opts = {})
- return unless redis = opts[:redis]
- redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
- end
-
- def main
- logger.debug("pagerduty gateway - commencing main method")
+ def start
+ @logger.info("starting")
while not test_pagerduty_connection do
- logger.error("Can't connect to the pagerduty API, retrying after 10 seconds")
+ @logger.error("Can't connect to the pagerduty API, retrying after 10 seconds")
EM::Synchrony.sleep(10)
end
# TODO: only clear this if there isn't another pagerduty gateway instance running
# or better, include an instance ID in the semaphore key name
@redis.del(SEM_PAGERDUTY_ACKS_RUNNING)
acknowledgement_timer = EM::Synchrony.add_periodic_timer(10) do
- @redis_timer ||= Flapjack::RedisPool.new(:config => @redis_config, :size => 1)
find_pagerduty_acknowledgements_if_safe
end
queue = @config['queue']
events = {}
- until should_quit?
- logger.debug("pagerduty gateway is going into blpop mode on #{queue}")
+ until @should_quit
+ @logger.debug("pagerduty gateway is going into blpop mode on #{queue}")
events[queue] = @redis.blpop(queue, 0)
event = Yajl::Parser.parse(events[queue][1])
type = event['notification_type']
- logger.debug("pagerduty notification event popped off the queue: " + event.inspect)
+ @logger.debug("pagerduty notification event popped off the queue: " + event.inspect)
unless 'shutdown'.eql?(type)
event_id = event['event_id']
entity, check = event_id.split(':')
state = event['state']
summary = event['summary']
@@ -115,22 +103,22 @@
# ensure we're the only instance of the pagerduty acknowledgement check running (with a naive
# timeout of five minutes to guard against stale locks caused by crashing code) either in this
# process or in other processes
if (@pagerduty_acks_started and @pagerduty_acks_started > (Time.now.to_i - 300)) or
- @redis_timer.get(SEM_PAGERDUTY_ACKS_RUNNING) == 'true'
- logger.debug("skipping looking for acks in pagerduty as this is already happening")
+ @redis.get(SEM_PAGERDUTY_ACKS_RUNNING) == 'true'
+ @logger.debug("skipping looking for acks in pagerduty as this is already happening")
return
end
@pagerduty_acks_started = Time.now.to_i
- @redis_timer.set(SEM_PAGERDUTY_ACKS_RUNNING, 'true')
- @redis_timer.expire(SEM_PAGERDUTY_ACKS_RUNNING, 300)
+ @redis.set(SEM_PAGERDUTY_ACKS_RUNNING, 'true')
+ @redis.expire(SEM_PAGERDUTY_ACKS_RUNNING, 300)
find_pagerduty_acknowledgements
- @redis_timer.del(SEM_PAGERDUTY_ACKS_RUNNING)
+ @redis.del(SEM_PAGERDUTY_ACKS_RUNNING)
@pagerduty_acks_started = nil
end
private
@@ -139,29 +127,28 @@
"incident_key" => "Flapjack is running a NOOP",
"event_type" => "nop",
"description" => "I love APIs with noops." }
code, results = send_pagerduty_event(noop)
return true if code == 200 && results['status'] =~ /success/i
- logger.error "Error: test_pagerduty_connection: API returned #{code.to_s} #{results.inspect}"
+ @logger.error "Error: test_pagerduty_connection: API returned #{code.to_s} #{results.inspect}"
false
end
def send_pagerduty_event(event)
options = { :body => Yajl::Encoder.encode(event) }
http = EM::HttpRequest.new(PAGERDUTY_EVENTS_API_URL).post(options)
response = Yajl::Parser.parse(http.response)
status = http.response_header.status
- logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
+ @logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
[status, response]
end
def find_pagerduty_acknowledgements
+ @logger.debug("looking for acks in pagerduty for unack'd problems")
- logger.debug("looking for acks in pagerduty for unack'd problems")
+ unacknowledged_failing_checks = Flapjack::Data::Global.unacknowledged_failing_checks(:redis => @redis)
- unacknowledged_failing_checks = Flapjack::Data::Global.unacknowledged_failing_checks(:redis => @redis_timer)
-
@logger.debug "found unacknowledged failing checks as follows: " + unacknowledged_failing_checks.join(', ')
unacknowledged_failing_checks.each do |entity_check|
# If more than one contact for this entity_check has pagerduty
@@ -188,11 +175,11 @@
@logger.debug "#{entity_check.entity_name}:#{check} is not acknowledged in pagerduty, skipping"
next
end
pg_acknowledged_by = acknowledged[:pg_acknowledged_by]
- @logger.debug "#{entity_check.entity_name}:#{check} is acknowledged in pagerduty, creating flapjack acknowledgement... "
+ @logger.info "#{entity_check.entity_name}:#{check} is acknowledged in pagerduty, creating flapjack acknowledgement... "
who_text = ""
if !pg_acknowledged_by.nil? && !pg_acknowledged_by['name'].nil?
who_text = " by #{pg_acknowledged_by['name']}"
end
entity_check.create_acknowledgement('summary' => "Acknowledged on PagerDuty" + who_text)
@@ -221,15 +208,9 @@
@logger.debug("pagerduty_acknowledged?: request to #{url}")
@logger.debug("pagerduty_acknowledged?: query: #{query.inspect}")
@logger.debug("pagerduty_acknowledged?: auth: #{options[:head].inspect}")
http = EM::HttpRequest.new(url).get(options)
- # DEBUG flapjack-pagerduty: pagerduty_acknowledged?: decoded response as:
- # {"incidents"=>[{"incident_number"=>40, "status"=>"acknowledged",
- # "last_status_change_by"=>{"id"=>"PO1NWPS", "name"=>"Jesse Reynolds",
- # "email"=>"jesse@bulletproof.net",
- # "html_url"=>"http://bltprf.pagerduty.com/users/PO1NWPS"}}], "limit"=>100, "offset"=>0,
- # "total"=>1}
begin
response = Yajl::Parser.parse(http.response)
rescue Yajl::ParseError
@logger.error("failed to parse json from a post to #{url} ... response headers and body follows...")
return nil