bin/check-aggregate.rb in sensu-plugins-sensu-0.1.0 vs bin/check-aggregate.rb in sensu-plugins-sensu-1.0.0
- old
+ new
@@ -19,12 +19,24 @@
class CheckAggregate < Sensu::Plugin::Check::CLI
option :api,
short: '-a URL',
long: '--api URL',
description: 'Sensu API URL',
- default: 'http://localhost:4567'
+ default: if ENV['SENSU_API']
+ ENV['SENSU_API'] + ':4567'
+ elsif ENV['SENSU_API_URL']
+ ENV['SENSU_API_URL']
+ else
+ 'http://localhost:4567'
+ end
+ option :insecure,
+ short: '-k',
+ boolean: true,
+ description: 'Enabling insecure connections',
+ default: false
+
option :user,
short: '-u USER',
long: '--user USER',
description: 'Sensu API USER'
@@ -64,39 +76,64 @@
long: '--summarize',
boolean: true,
description: 'Summarize check result output',
default: false
+ option :collect_output,
+ short: '-o',
+ long: '--output',
+ boolean: true,
+ description: 'Collects all non-ok outputs',
+ default: false
+
option :warning,
short: '-W PERCENT',
long: '--warning PERCENT',
description: 'PERCENT non-ok before warning',
proc: proc(&:to_i)
+ option :warning_count,
+ long: '--warning_count INTEGER',
+ description: 'number of nodes in warning before warning',
+ proc: proc(&:to_i)
+
option :critical,
short: '-C PERCENT',
long: '--critical PERCENT',
description: 'PERCENT non-ok before critical',
proc: proc(&:to_i)
+ option :critical_count,
+ long: '--critical_count INTEGER',
+ description: 'number of node in critical before critical',
+ proc: proc(&:to_i)
+
option :pattern,
short: '-P PATTERN',
long: '--pattern PATTERN',
description: 'A PATTERN to detect outliers'
+ option :honor_stash,
+ short: '-i',
+ long: '--honor-stash',
+ description: 'Checks that are stashed will be ignored from the aggregate',
+ boolean: true,
+ default: false
+
option :message,
short: '-M MESSAGE',
long: '--message MESSAGE',
description: 'A custom error MESSAGE'
def api_request(resource)
+ verify_mode = OpenSSL::SSL::VERIFY_PEER
+ verify_mode = OpenSSL::SSL::VERIFY_NONE if config[:insecure]
request = RestClient::Resource.new(config[:api] + resource, timeout: config[:timeout],
user: config[:user],
- password: config[:password])
+ password: config[:password],
+ verify_ssl: verify_mode)
JSON.parse(request.get, symbolize_names: true)
- rescue RestClient::ResourceNotFound
- warning "Resource not found: #{resource}"
rescue Errno::ECONNREFUSED
warning 'Connection refused'
rescue RestClient::RequestFailed
warning 'Request failed'
rescue RestClient::RequestTimeout
@@ -105,19 +142,64 @@
warning 'Missing or incorrect Sensu API credentials'
rescue JSON::ParserError
warning 'Sensu API returned invalid JSON'
end
+ def honor_stash(aggregate)
+ aggregate[:results].delete_if do |entry|
+ begin
+ api_request("/stashes/silence/#{entry[:client]}/#{config[:check]}")
+ if entry[:status] == 0
+ aggregate[:ok] = aggregate[:ok] - 1
+ elsif entry[:status] == 1
+ aggregate[:warning] = aggregate[:warning] - 1
+ elsif entry[:status] == 2
+ aggregate[:critical] = aggregate[:critical] - 1
+ else
+ aggregate[:unknown] = aggregate[:unknown] - 1
+ end
+ aggregate[:total] = aggregate[:total] - 1
+ true
+ rescue RestClient::ResourceNotFound
+ false
+ end
+ end
+ aggregate
+ end
+
+ def collect_output(aggregate)
+ output = ''
+ aggregate[:results].each do |entry|
+ output << entry[:output] + "\n" unless entry[:status] == 0
+ end
+ aggregate[:outputs] = [output]
+ end
+
def acquire_aggregate
+ if api_request('/info')[:sensu][:version].split('.')[1] >= '24'
+ named_aggregate_results
+ else
+ aggregate_results
+ end
+ end
+
+ def named_aggregate_results
+ results = api_request("/aggregates/#{config[:check]}?max_age=#{config[:age]}")[:results]
+ warning "No aggregates found in last #{config[:age]} seconds" if %w(ok warning critical unknown).all? { |x| results[x.to_sym] == 0 }
+ results
+ end
+
+ def aggregate_results
uri = "/aggregates/#{config[:check]}"
issued = api_request(uri + "?age=#{config[:age]}" + (config[:limit] ? "&limit=#{config[:limit]}" : ''))
unless issued.empty?
issued_sorted = issued.sort
time = issued_sorted.pop
unless time.nil?
- uri += "/#{time}"
- uri += '?summarize=output' if config[:summarize]
+ uri += "/#{time}?"
+ uri += '&summarize=output' if config[:summarize]
+ uri += '&results=true' if config[:honor_stash] || config[:collect_output]
api_request(uri)
else
warning "No aggregates older than #{config[:age]} seconds"
end
else
@@ -125,42 +207,77 @@
end
end
def compare_thresholds(aggregate)
percent_non_zero = (100 - (aggregate[:ok].to_f / aggregate[:total].to_f) * 100).to_i
- message = config[:message] || 'Number of non-zero results exceeds threshold'
- message += " (#{percent_non_zero}% non-zero)"
+ message = ''
+ if aggregate[:outputs]
+ aggregate[:outputs].each do |output, count|
+ message << "\n" + output.to_s if count == 1
+ end
+ else
+ message = config[:message] || 'Number of non-zero results exceeds threshold'
+ message += " (#{percent_non_zero}% non-zero)"
+ end
+
if config[:critical] && percent_non_zero >= config[:critical]
critical message
elsif config[:warning] && percent_non_zero >= config[:warning]
warning message
end
end
def compare_pattern(aggregate)
- if config[:summarize] && config[:pattern]
- regex = Regexp.new(config[:pattern])
- mappings = {}
- message = config[:message] || 'One of these is not like the others!'
- aggregate[:outputs].each do |output, _count|
- matched = regex.match(output.to_s)
- unless matched.nil?
- key = matched[1]
- value = matched[2..-1]
- if mappings.key?(key)
- unless mappings[key] == value # rubocop:disable Metrics/BlockNesting
- critical message + " (#{key})"
- end
+ regex = Regexp.new(config[:pattern])
+ mappings = {}
+ message = config[:message] || 'One of these is not like the others!'
+ aggregate[:outputs].each do |output, _count|
+ matched = regex.match(output.to_s)
+ unless matched.nil?
+ key = matched[1]
+ value = matched[2..-1]
+ if mappings.key?(key)
+ unless mappings[key] == value
+ critical message + " (#{key})"
end
- mappings[key] = value
end
+ mappings[key] = value
end
end
end
+ def compare_thresholds_count(aggregate)
+ number_of_nodes_reporting_down = aggregate[:total].to_i - aggregate[:ok].to_i
+ message = ''
+ if aggregate[:outputs]
+ aggregate[:outputs].each do |output, count|
+ message << "\n" + output.to_s if count == 1
+ end
+ else
+ message = config[:message] || 'Number of nodes down exceeds threshold'
+ message += " (#{number_of_nodes_reporting_down} out of #{aggregate[:total]} nodes reporting not ok)"
+ end
+
+ if config[:critical_count] && number_of_nodes_reporting_down >= config[:critical_count]
+ critical message
+ elsif config[:warning_count] && number_of_nodes_reporting_down >= config[:warning_count]
+ warning message
+ end
+ end
+
def run
+ threshold = config[:critical] || config[:warning]
+ threshold_count = config[:critical_count] || config[:warning_count]
+ pattern = config[:summarize] && config[:pattern]
+ critical 'Misconfiguration: critical || warning || (summarize && pattern) must be set' unless threshold || pattern || threshold_count
+
aggregate = acquire_aggregate
- compare_thresholds(aggregate)
- compare_pattern(aggregate)
+ aggregate = honor_stash(aggregate) if config[:honor_stash]
+ puts aggregate
+ aggregate = collect_output(aggregate) if config[:collect_output]
+ compare_thresholds(aggregate) if threshold
+ compare_pattern(aggregate) if pattern
+ compare_thresholds_count(aggregate) if threshold_count
+
ok 'Aggregate looks GOOD'
end
end