#! /usr/bin/env ruby # # check-rds-events # # # DESCRIPTION: # This plugin checks rds clusters for critical events. # Due to the number of events types on RDS clusters, the check # should filter out non-disruptive events that are part of # basic operations. # # More info on RDS events: # http://docs.aws.amazon.com/AmazonRDS/latest/UserGuide/USER_Events.html # # OUTPUT: # plain-text # # PLATFORMS: # Linux # # DEPENDENCIES: # gem: aws-sdk-v1 # gem: sensu-plugin # # USAGE: # Check's a specific RDS instance in a specific region for critical events # check-rds-events.rb -r ${your_region} -k ${your_aws_secret_access_key} -a ${your_aws_access_key} -i ${your_rds_instance_id_name} # # Checks all RDS instances in a specific region # check-rds-events.rb -r ${your_region} -k ${your_aws_secret_access_key} -a ${your_aws_access_key} # # Checks all RDS instances in a specific region, should be using IAM role # check-rds-events.rb -r ${your_region} # # NOTES: # # LICENSE: # Tim Smith # Released under the same terms as Sensu (the MIT license); see LICENSE # for details. # require 'sensu-plugin/check/cli' require 'aws-sdk' class CheckRDSEvents < Sensu::Plugin::Check::CLI option :aws_access_key, short: '-a AWS_ACCESS_KEY', long: '--aws-access-key AWS_ACCESS_KEY', description: "AWS Access Key. Either set ENV['AWS_ACCESS_KEY'] or provide it as an option", default: ENV['AWS_ACCESS_KEY'] option :aws_secret_access_key, short: '-k AWS_SECRET_KEY', long: '--aws-secret-access-key AWS_SECRET_KEY', description: "AWS Secret Access Key. Either set ENV['AWS_SECRET_KEY'] or provide it as an option", default: ENV['AWS_SECRET_KEY'] option :aws_region, short: '-r AWS_REGION', long: '--aws-region REGION', description: 'AWS Region (defaults to us-east-1).', default: 'us-east-1' option :db_instance_id, short: '-i N', long: '--db-instance-id NAME', description: 'DB instance identifier' def aws_config { access_key_id: config[:aws_access_key], secret_access_key: config[:aws_secret_access_key], region: config[:aws_region] } end def rds_regions Aws.partition('aws').regions.map(&:name) end def run clusters = maint_clusters if clusters.empty? ok else critical("Clusters w/ critical events: #{clusters.join(', ')}") end end def maint_clusters maint_clusters = [] aws_regions = rds_regions unless config[:aws_region].casecmp('all').zero? if aws_regions.include? config[:aws_region] aws_regions.clear.push(config[:aws_region]) else critical 'Invalid region specified!' end end aws_regions.each do |r| rds = Aws::RDS::Client.new aws_config.merge!(region: r) begin if !config[:db_instance_id].nil? && !config[:db_instance_id].empty? db_instance = rds.describe_db_instances(db_instance_identifier: config[:db_instance_id]) if db_instance.nil? || db_instance.empty? unknown "#{config[:db_instance_id]} instance not found" else clusters = [config[:db_instance_id]] end else # fetch all clusters identifiers clusters = rds.describe_db_instances[:db_instances].map { |db| db[:db_instance_identifier] } end # fetch the last 15 minutes of events for each cluster # that way, we're only spammed with persistent notifications that we'd care about. clusters.each do |cluster_name| events_record = rds.describe_events(start_time: (Time.now.utc - 900).iso8601, source_type: 'db-instance', source_identifier: cluster_name) next if events_record[:events].empty? # we will need to filter out non-disruptive/basic operation events. # ie. the regular backup operations next if events_record[:events][-1][:message] =~ /Backing up DB instance|Finished DB Instance backup|Restored from snapshot/ # ie. Replication resumed next if events_record[:events][-1][:message] =~ /Replication for the Read Replica resumed/ # you can add more filters to skip more events. # draft the messages cluster_name_long = "#{cluster_name} (#{r}) #{events_record[:events][-1][:message]}" maint_clusters.push(cluster_name_long) end rescue StandardError => e unknown "An error occurred processing AWS RDS API (#{r}): #{e.message}" end end maint_clusters end end