#!/usr/bin/env ruby

require 'eventmachine'
# the redis/synchrony gems need to be required in this particular order, see
# the redis-rb README for details
require 'hiredis'
require 'em-synchrony'
require 'em-synchrony/em-http'
require 'redis/connection/synchrony'
require 'redis'

require 'yajl/json_gem'

require 'flapjack/data/entity_check'
require 'flapjack/pikelet'

module Flapjack

  class Pagerduty

    include Flapjack::Pikelet

    def setup
      @redis = build_redis_connection_pool
      logger.debug("New Pagerduty pikelet with the following options: #{@config.inspect}")

      @pagerduty_events_api_url = 'https://events.pagerduty.com/generic/2010-04-15/create_event.json'
      @pagerduty_acks_started = nil
      @sem_pagerduty_acks_running = 'sem_pagerduty_acks_running'
    end

    def send_pagerduty_event(event)
      options  = { :body => Yajl::Encoder.encode(event) }
      http = EM::HttpRequest.new(@pagerduty_events_api_url).post(options)
      response = Yajl::Parser.parse(http.response)
      status   = http.response_header.status
      logger.debug "send_pagerduty_event got a return code of #{status.to_s} - #{response.inspect}"
      [status, response]
    end

    def test_pagerduty_connection
      noop = { "service_key"  => "11111111111111111111111111111111",
               "incident_key" => "Flapjack is running a NOOP",
               "event_type"   => "nop",
               "description"  => "I love APIs with noops." }
      code, results = send_pagerduty_event(noop)
      return true if code == 200 && results['status'] =~ /success/i
      logger.error "Error: test_pagerduty_connection: API returned #{code.to_s} #{results.inspect}"
      false
    end

    # this should be moved to a checks data model perhaps
    def unacknowledged_failing_checks
      failing_checks = @redis_timer.zrange('failed_checks', '0', '-1')
      unless failing_checks.is_a?(Array)
        @logger.error("redis.zrange returned something other than an array! Here it is: " + failing_checks.inspect)
      end
      ufc = failing_checks.reject {|check|
        @redis_timer.exists(check + ':unscheduled_maintenance')
      }
      @logger.debug "found unacknowledged failing checks as follows: " + ufc.join(', ')
      ufc
    end

    def pagerduty_acknowledged?(opts)
      subdomain   = opts['subdomain']
      username    = opts['username']
      password    = opts['password']
      check       = opts['check']

      url = 'https://' + subdomain + '.pagerduty.com/api/v1/incidents'
      query = { 'fields'       => 'incident_number,status,last_status_change_by',
                'since'        => (Time.new.utc - (60*60*24*7)).iso8601,
                'until'        => (Time.new.utc + (60*60*24)).iso8601,
                'incident_key' => check,
                'status'       => 'acknowledged' }

      options = { :head  => { 'authorization' => [username, password] },
                  :query => query }

      http = EM::HttpRequest.new(url).get(options)
      # DEBUG flapjack-pagerduty: pagerduty_acknowledged?: decoded response as:
      # {"incidents"=>[{"incident_number"=>40, "status"=>"acknowledged",
      # "last_status_change_by"=>{"id"=>"PO1NWPS", "name"=>"Jesse Reynolds",
      # "email"=>"jesse@bulletproof.net",
      # "html_url"=>"http://bltprf.pagerduty.com/users/PO1NWPS"}}], "limit"=>100, "offset"=>0,
      # "total"=>1}
      begin
        response = Yajl::Parser.parse(http.response)
      rescue Yajl::ParseError
        @logger.error("failed to parse json from a post to #{url} ... response headers and body follows...")
        @logger.error(http.response_header.inspect)
        @logger.error(http.response)
        return nil, nil
      end
      status   = http.response_header.status

      @logger.debug("pagerduty_acknowledged?: decoded response as: #{response.inspect}")
      if response.nil?
        @logger.error('no valid response received from pagerduty!')
        return nil, nil
      end

      if response['incidents'].nil?
        @logger.error('no incidents found in response')
        return nil, nil
      end

      if response['incidents'].length > 0
        pg_acknowledged_by = response['incidents'].first['last_status_change_by']
        return true, :pg_acknowledged_by => pg_acknowledged_by
      else
        return false, nil
      end
    end

    def catch_pagerduty_acks

      # ensure we're the only instance of the pagerduty acknowledgement check running (with a naive
      # timeout of five minutes to guard against stale locks caused by crashing code) either in this
      # process or in other processes
      if (@pagerduty_acks_started and @pagerduty_acks_started > (Time.now.to_i - 300)) or
          @redis_timer.get(@sem_pagerduty_acks_running) == 'true'
        logger.debug("skipping looking for acks in pagerduty as this is already happening")
        return
      end

      @pagerduty_acks_started = Time.now.to_i
      @redis_timer.set(@sem_pagerduty_acks_running, 'true')
      @redis_timer.expire(@sem_pagerduty_acks_running, 300)

      logger.debug("looking for acks in pagerduty for unack'd problems")

      # ok lets do it
      unacknowledged_failing_checks.each {|check|
        entity_check = Flapjack::Data::EntityCheck.for_event_id(check, { :redis => @redis_timer, :logger => @logger } )
        pagerduty_credentials = entity_check.pagerduty_credentials( { :redis => @redis_timer, :logger => @logger } )

        if pagerduty_credentials.length == 0
          @logger.debug("Found no pagerduty creditials for #{entity_check.entity_name}:#{entity_check.check}, moving on")
          next
        end

        # FIXME: try each set of credentials until one works (may have stale contacts turning up)
        options = pagerduty_credentials.first.merge('check' => check)

        pagerduty_acknowledged, result_hash = pagerduty_acknowledged?(options)
        if pagerduty_acknowledged
          pg_acknowledged_by = result_hash[:pg_acknowledged_by] unless result_hash.nil?
          @logger.debug "#{check} is acknowledged in pagerduty, creating flapjack acknowledgement ... "
          who_text = ""
          if !pg_acknowledged_by.nil? && !pg_acknowledged_by['name'].nil?
            who_text = " by #{pg_acknowledged_by['name']}"
          end
          entity_check.create_acknowledgement('summary' => "Acknowledged on PagerDuty" + who_text)
        else
          @logger.debug "#{check} is not acknowledged in pagerduty, moving on"
        end
      }
      @redis_timer.del(@sem_pagerduty_acks_running)
      @pagerduty_acks_started = nil
    end

    def add_shutdown_event(opts = {})
      return unless redis = opts[:redis]
      redis.rpush(@config['queue'], JSON.generate('notification_type' => 'shutdown'))
    end

    def main
      setup

      logger.debug("pagerduty gateway - commencing main method")
      raise "Can't connect to the pagerduty API" unless test_pagerduty_connection

      # TODO: only clear this if there isn't another pagerduty gateway instance running
      # or better, include on instance ID in the semaphore key name
      @redis.del(@sem_pagerduty_acks_running)

      acknowledgement_timer = EM::Synchrony.add_periodic_timer(10) do
        @redis_timer ||= build_redis_connection_pool
        catch_pagerduty_acks
      end

      queue = @config['queue']
      events = {}

      until should_quit?
        logger.debug("pagerduty gateway is going into blpop mode on #{queue}")
        events[queue] = @redis.blpop(queue)
        event         = Yajl::Parser.parse(events[queue][1])
        type          = event['notification_type']
        logger.debug("pagerduty notification event popped off the queue: " + event.inspect)
        if 'shutdown'.eql?(type)
          # do anything in particular?
        else
          event_id      = event['event_id']
          entity, check = event_id.split(':')
          state         = event['state']
          summary       = event['summary']
          address       = event['address']

          case type.downcase
          when 'acknowledgement'
            maint_str      = "has been acknowledged"
            pagerduty_type = 'acknowledge'
          when 'problem'
            maint_str      = "is #{state.upcase}"
            pagerduty_type = "trigger"
          when 'recovery'
            maint_str      = "is #{state.upcase}"
            pagerduty_type = "resolve"
          end

          message = "#{type.upcase} - \"#{check}\" on #{entity} #{maint_str} - #{summary}"

          pagerduty_event = { :service_key  => address,
                              :incident_key => event_id,
                              :event_type   => pagerduty_type,
                              :description  => message }

          send_pagerduty_event(pagerduty_event)
        end
      end

      acknowledgement_timer.cancel
    end

  end
end