bin/check_check in ruby-nagios-0.2.0 vs bin/check_check in ruby-nagios-0.2.2
- old
+ new
@@ -76,11 +76,11 @@
# TODO(sissel): add a proper 'status' model that
# has HostStatus, ServiceStatus, etc.
end # class Nagios::Status::Model
-Settings = Struct.new(:nagios_cfg, :status_path, :service_pattern, :host_pattern, :down_min_percent)
+Settings = Struct.new(:nagios_cfg, :status_path, :service_pattern, :host_pattern, :percent_critical, :percent_warning, :percent_unknown, :show_ok)
def main(args)
progname = File.basename($0)
settings = Settings.new
settings.nagios_cfg = "/etc/nagios3/nagios.cfg" # debian/ubuntu default
@@ -100,14 +100,28 @@
opts.on("-h REGEX", "--host REGEX",
"Aggregate only services from hosts matching the given pattern") do |val|
settings.host_pattern = val
end
- opts.on("-p NUM", "--percent NUM",
- "Only alert if this percentage of the cluster is down") do |val|
- settings.down_min_percent = Float(val)
+ opts.on( "--percent-warning NUM",
+ "Only alert if this percentage of the cluster is in warning state") do |val|
+ settings.percent_warning = Float(val)
end
+
+ opts.on( "--percent-critical NUM",
+ "Only alert if this percentage of the cluster is in critical state") do |val|
+ settings.percent_critical = Float(val)
+ end
+
+ opts.on( "--percent-unknown NUM",
+ "Only alert if this percentage of the cluster is in unknown state") do |val|
+ settings.percent_unknown = Float(val)
+ end
+
+ opts.on( "--show-ok", "Show details for checks in OK state too") do
+ settings.show_ok = true
+ end
end # OptionParser.new
opts.parse!(args)
# hacky parsing, for now
@@ -146,47 +160,53 @@
end
print "services=/#{settings.service_pattern}/ "
print "hosts=/#{settings.host_pattern}/ "
puts
+ total_results = ["OK", "WARNING", "CRITICAL", "UNKNOWN"].inject(0) {|aggr,state| aggr += results[state].length}
+
# More data output
- total_results = 0.0
["WARNING", "CRITICAL", "UNKNOWN"].each do |state|
if results[state] && results[state].size > 0
puts "Services in #{state}:"
results[state].sort { |a,b| a["host_name"] <=> b["host_name"] }.each do |service|
- total_results += 1
- puts " #{service["host_name"]} => #{service["service_description"]} (#{service["plugin_output"]})"
+ if service["long_plugin_output"] and !service["long_plugin_output"].empty?
+ puts " #{service["host_name"]} => #{service["service_description"]} (#{service["plugin_output"]})"
+ puts " #{service["long_plugin_output"]}"
+ else
+ puts " #{service["host_name"]} => #{service["service_description"]} (#{service["plugin_output"]})"
+ end
end
end # if results[state]
end # for each non-OK state
- # If everything is OK, still print detailed output for confirmation
- if total_results == 0 and results["OK"].size > 0
- puts "OK Services:"
- results["OK"].sort { |a,b| a["host_name"] <=> b["host_name"] }.each do |service|
- puts " #{service["host_name"]} => #{service["service_description"]}"
- end
+ if settings.show_ok and results["OK"].size > 0
+ puts "OK Services:"
+ results["OK"].sort { |a,b| a["host_name"] <=> b["host_name"] }.each do |service|
+ puts " #{service["host_name"]} => #{service["service_description"]}"
+ end
end
exitcode = 0
- if settings.down_min_percent
- if results["WARNING"].length > 0 && (results["WARNING"].length / total_results) * 100 >= settings.down_min_percent
- exitcode = 1
- end
- if results["CRITICAL"].length > 0 && (results["CRITICAL"].length / total_results) * 100 >= settings.down_min_percent
- exitcode = 2
- end
+ if settings.percent_unknown
+ exitcode = 3 if results["UNKNOWN"].length > 0 && (results["UNKNOWN"].length.to_f / total_results) * 100 >= settings.percent_unknown
else
- if results["WARNING"].length > 0
- exitcode = 1
- end
+ exitcode = 3 if results["UNKNOWN"].length > 0
+ end
- if results["CRITICAL"].length > 0
- exitcode = 2
- end
+ if settings.percent_warning
+ exitcode = 1 if results["WARNING"].length > 0 && ((results["WARNING"].length.to_f + results["CRITICAL"].length.to_f) / total_results) * 100 >= settings.percent_warning
+ else
+ exitcode = 1 if results["WARNING"].length > 0
end
+
+ if settings.percent_critical
+ exitcode = 2 if results["CRITICAL"].length > 0 && (results["CRITICAL"].length.to_f / total_results) * 100 >= settings.percent_critical
+ else
+ exitcode = 2 if results["CRITICAL"].length > 0
+ end
+
return exitcode
end
exit(main(ARGV))