bin/check-rds.rb in sensu-plugins-aws-3.2.1 vs bin/check-rds.rb in sensu-plugins-aws-4.0.0

- old
+ new

@@ -84,21 +84,26 @@ option :db_instance_id, short: '-i N', long: '--db-instance-id NAME', description: 'DB instance identifier' + option :db_cluster_id, + short: '-l N', + long: '--db-cluster-id NAME', + description: 'DB cluster identifier' + option :end_time, short: '-t T', long: '--end-time TIME', default: Time.now, proc: proc { |a| Time.parse a }, description: 'CloudWatch metric statistics end time' option :period, short: '-p N', long: '--period SECONDS', - default: 60, + default: 180, proc: proc(&:to_i), description: 'CloudWatch metric statistics period' option :statistics, short: '-S N', @@ -152,10 +157,16 @@ db = rds.describe_db_instances.db_instances.detect { |db_instance| db_instance.db_instance_identifier == id } unknown 'DB instance not found.' if db.nil? db end + def find_db_cluster_writer(id) + wr = rds.describe_db_clusters(db_cluster_identifier: id).db_clusters[0].db_cluster_members.detect(&:is_cluster_writer).db_instance_identifier + unknown 'DB cluster not found.' if cl.nil? + wr + end + def cloud_watch_metric(metric_name, unit) cloud_watch.get_metric_statistics( namespace: 'AWS/RDS', metric_name: metric_name, dimensions: [ @@ -181,15 +192,10 @@ else values.last[config[:statistics]] end end - def flag_alert(severity, message) - @severities[severity] = true - @message += message - end - def memory_total_bytes(instance_class) memory_total_gigabytes = { 'db.cr1.8xlarge' => 244.0, 'db.m1.small' => 1.7, 'db.m1.medium' => 3.75, @@ -222,86 +228,120 @@ memory_total_gigabytes.fetch(instance_class) * 1024**3 end def check_az(severity, expected_az) return if @db_instance.availability_zone == expected_az - flag_alert severity, "; AZ is #{@db_instance.availability_zone} (expected #{expected_az})" + @severities[severity] = true + "; AZ is #{@db_instance.availability_zone} (expected #{expected_az})" end def check_cpu(severity, expected_lower_than) - @cpu_metric ||= cloud_watch_metric 'CPUUtilization', 'Percent' - @cpu_metric_value ||= latest_value @cpu_metric - return if @cpu_metric_value < expected_lower_than - flag_alert severity, "; CPUUtilization is #{sprintf '%.2f', @cpu_metric_value}% (expected lower than #{expected_lower_than}%)" + cpu_metric ||= cloud_watch_metric 'CPUUtilization', 'Percent' + cpu_metric_value ||= latest_value cpu_metric + return if cpu_metric_value < expected_lower_than + @severities[severity] = true + "; CPUUtilization is #{sprintf '%.2f', cpu_metric_value}% (expected lower than #{expected_lower_than}%)" end def check_memory(severity, expected_lower_than) - @memory_metric ||= cloud_watch_metric 'FreeableMemory', 'Bytes' - @memory_metric_value ||= latest_value @memory_metric - @memory_total_bytes ||= memory_total_bytes @db_instance.db_instance_class - @memory_usage_bytes ||= @memory_total_bytes - @memory_metric_value - @memory_usage_percentage ||= @memory_usage_bytes / @memory_total_bytes * 100 - return if @memory_usage_percentage < expected_lower_than - flag_alert severity, "; Memory usage is #{sprintf '%.2f', @memory_usage_percentage}% (expected lower than #{expected_lower_than}%)" + memory_metric ||= cloud_watch_metric 'FreeableMemory', 'Bytes' + memory_metric_value ||= latest_value memory_metric + memory_total_bytes ||= memory_total_bytes @db_instance.db_instance_class + memory_usage_bytes ||= memory_total_bytes - memory_metric_value + memory_usage_percentage ||= memory_usage_bytes / memory_total_bytes * 100 + return if memory_usage_percentage < expected_lower_than + @severities[severity] = true + "; Memory usage is #{sprintf '%.2f', memory_usage_percentage}% (expected lower than #{expected_lower_than}%)" end def check_disk(severity, expected_lower_than) - @disk_metric ||= cloud_watch_metric 'FreeStorageSpace', 'Bytes' - @disk_metric_value ||= latest_value @disk_metric - @disk_total_bytes ||= @db_instance.allocated_storage * 1024**3 - @disk_usage_bytes ||= @disk_total_bytes - @disk_metric_value - @disk_usage_percentage ||= @disk_usage_bytes / @disk_total_bytes * 100 - return if @disk_usage_percentage < expected_lower_than - flag_alert severity, "; Disk usage is #{sprintf '%.2f', @disk_usage_percentage}% (expected lower than #{expected_lower_than}%)" + disk_metric ||= cloud_watch_metric 'FreeStorageSpace', 'Bytes' + disk_metric_value ||= latest_value disk_metric + disk_total_bytes ||= @db_instance.allocated_storage * 1024**3 + disk_usage_bytes ||= disk_total_bytes - disk_metric_value + disk_usage_percentage ||= disk_usage_bytes / disk_total_bytes * 100 + return if disk_usage_percentage < expected_lower_than + @severities[severity] = true + "; Disk usage is #{sprintf '%.2f', disk_usage_percentage}% (expected lower than #{expected_lower_than}%)" end def check_connections(severity, expected_lower_than) - @connections_metric ||= cloud_watch_metric 'DatabaseConnections', 'Count' - @connections_metric_value ||= latest_value @connections_metric - return if @connections_metric_value < expected_lower_than - flag_alert severity, "; DatabaseConnections are #{sprintf '%d', @connections_metric_value} (expected lower than #{expected_lower_than})" + connections_metric ||= cloud_watch_metric 'DatabaseConnections', 'Count' + connections_metric_value ||= latest_value connections_metric + return if connections_metric_value < expected_lower_than + @severities[severity] = true + "; DatabaseConnections are #{sprintf '%d', connections_metric_value} (expected lower than #{expected_lower_than})" end def check_iops(severity, expected_lower_than) - @read_iops_metric ||= cloud_watch_metric 'ReadIOPS', 'Count/Second' - @read_iops_metric_value ||= latest_value @read_iops_metric - @write_iops_metric ||= cloud_watch_metric 'WriteIOPS', 'Count/Second' - @write_iops_metric_value ||= latest_value @write_iops_metric - @iops_metric_value ||= @read_iops_metric_value + @write_iops_metric_value - return if @iops_metric_value < expected_lower_than - flag_alert severity, "; IOPS are #{sprintf '%d', @iops_metric_value} (expected lower than #{expected_lower_than})" + read_iops_metric ||= cloud_watch_metric 'ReadIOPS', 'Count/Second' + read_iops_metric_value ||= latest_value read_iops_metric + write_iops_metric ||= cloud_watch_metric 'WriteIOPS', 'Count/Second' + write_iops_metric_value ||= latest_value write_iops_metric + iops_metric_value ||= read_iops_metric_value + write_iops_metric_value + return if iops_metric_value < expected_lower_than + @severities[severity] = true + "; IOPS are #{sprintf '%d', iops_metric_value} (expected lower than #{expected_lower_than})" end def run + instances = [] + if config[:db_cluster_id] + db_cluster_writer_id = find_db_cluster_writer(db_cluster_id) + instances << find_db_instance(db_cluster_writer_id) + end + if config[:db_instance_id].nil? || config[:db_instance_id].empty? - unknown 'No DB instance provided. See help for usage details' + rds.describe_db_instances[:db_instances].map { |db| instances << db } + else + instances << find_db_instance(config[:db_instance_id]) end - @db_instance = find_db_instance config[:db_instance_id] - @message = "#{config[:db_instance_id]}: " - @severities = { + messages = '' + severities = { critical: false, warning: false } + instances.each do |instance| + @db_instance = instance + result = collect(instance) + if result[1][:critical] + messages += result[0] + severities[:critical] = true + elsif result[1][:warning] + severities[:warning] = true + messages += result[0] + end + end + if severities[:critical] + critical messages + elsif severities[:warning] + warning messages + else + ok messages + end + end + + def collect(instance) + message = "\n#{instance[:db_instance_identifier]}: " + @severities = { + critical: false, + warning: false + } + @severities.keys.each do |severity| - check_az severity, config[:"availability_zone_#{severity}"] if config[:"availability_zone_#{severity}"] + message += check_az severity, config[:"availability_zone_#{severity}"], instance if config[:"availability_zone_#{severity}"] %w(cpu memory disk connections iops).each do |item| - send "check_#{item}", severity, config[:"#{item}_#{severity}_over"] if config[:"#{item}_#{severity}_over"] + result = send "check_#{item}", severity, config[:"#{item}_#{severity}_over"] if config[:"#{item}_#{severity}_over"] + message += result unless result.nil? end end if %w(cpu memory disk connections iops).any? { |item| %w(warning critical).any? { |severity| config[:"#{item}_#{severity}_over"] } } - @message += "(#{config[:statistics].to_s.capitalize} within #{config[:period]}s " - @message += "between #{config[:end_time] - config[:period]} to #{config[:end_time]})" + message += "(#{config[:statistics].to_s.capitalize} within #{config[:period]}s " + message += "between #{config[:end_time] - config[:period]} to #{config[:end_time]})" end - - if @severities[:critical] - critical @message - elsif @severities[:warning] - warning @message - else - ok @message - end + [message, @severities] end end