bin/riemann-riak in riemann-tools-0.0.9 vs bin/riemann-riak in riemann-tools-0.1.0

- old
+ new

@@ -13,29 +13,49 @@ opt :riak_host, "Riak host", :default => Socket.gethostname opt :data_dir, "Riak data directory", :default => '/var/lib/riak' opt :stats_port, "Riak HTTP port for stats", :default => 8098 opt :stats_path, "Riak HTTP stats path", :default => '/stats' opt :node_name, "Riak erlang node name", :default => "riak@#{Socket.gethostname}" - + opt :get_50_warning, "FSM 50% get time warning threshold (ms)", :default => 1000 opt :put_50_warning, "FSM 50% put time warning threshold (ms)", :default => 1000 opt :get_95_warning, "FSM 95% get time warning threshold (ms)", :default => 2000 opt :put_95_warning, "FSM 95% put time warning threshold (ms)", :default => 2000 opt :get_99_warning, "FSM 99% get time warning threshold (ms)", :default => 10000 opt :put_99_warning, "FSM 99% put time warning threshold (ms)", :default => 10000 def initialize + @escript = true + @riakadmin = true + @httpstats = true + if `which escript` =~ /^\s*$/ - puts "No escript; disabling ring/key checks." - @no_escript = true + @escript = false end + + if `which riak-admin` =~ /^\s*$/ + @riakadmin = false + end + + if + begin + Net::HTTP.start(opts[:riak_host], opts[:stats_port]) do |http| + http.get opts[:stats_path] + end + rescue => e + @httpstatus = false + end + end end def check_ring - return if @no_escript + if @escript + str = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-ring #{opts[:node_name]}`.chomp + elsif @riakadmin + str = `riak-admin ringready` + end - str = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-ring #{opts[:node_name]}`.chomp if str =~ /^TRUE/ report( :host => opts[:riak_host], :service => 'riak ring', :state => 'ok', @@ -50,12 +70,10 @@ ) end end def check_keys - return if @no_escript - keys = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-keys #{opts[:node_name]}`.chomp if keys =~ /^\d+$/ report( :host => opts[:riak_host], :service => 'riak keys', @@ -64,11 +82,11 @@ :description => keys ) else report( :host => opts[:riak_host], - :servie => 'riak keys', + :service => 'riak keys', :state => 'unknown', :description => keys ) end end @@ -101,62 +119,74 @@ 'critical' end end def check_stats - begin - res = Net::HTTP.start(opts[:riak_host], opts[:stats_port]) do |http| - http.get opts[:stats_path] + if @httpstatus + begin + res = Net::HTTP.start(opts[:riak_host], opts[:stats_port]) do |http| + http.get opts[:stats_path] + end + rescue => e + report( + :host => opts[:riak_host], + :service => 'riak', + :state => 'critical', + :description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}" + ) + return end - rescue => e - report( - :host => opts[:riak_host], - :service => 'riak', - :state => 'critical', - :description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}" - ) - return - end - if res.code.to_i == 200 - stats = JSON.parse(res.body) + if res.code.to_i == 200 + stats = JSON.parse(res.body) + else + report( + :host => opts[:riak_host], + :service => 'riak', + :state => 'critical', + :description => "stats returned HTTP #{res.code}:\n\n#{res.body}" + ) + return + end + elsif @riakadmin + stats = Hash[`riak-admin status`.split(/\n/).map{|i| i.split(/ : /)}] else - report( - :host => opts[:riak_host], - :service => 'riak', - :state => 'critical', - :description => "stats returned HTTP #{res.code}:\n\n#{res.body}" - ) - return + report( + :host => opts[:riak_host], + :service => 'riak', + :state => 'critical', + :description => "error fetching Riak stats" + ) + return end report( :host => opts[:riak_host], :service => 'riak', :state => 'ok' ) # Gets/puts/rr [ - 'vnode_gets', - 'vnode_puts', - 'node_gets', - 'node_puts', - 'read_repairs' + 'vnode_gets', + 'vnode_puts', + 'node_gets', + 'node_puts', + 'read_repairs' ].each do |s| report( :host => opts[:riak_host], :service => "riak #{s}", :state => 'ok', - :metric => stats[s]/60.0, - :description => "#{stats[s]/60.0}/sec" + :metric => stats[s].to_i/60.0, + :description => "#{stats[s].to_i/60.0}/sec" ) end # FSMs ['get', 'put'].each do |type| [50, 95, 99].each do |percentile| - val = stats[fsm_stat(type, percentile)] || 0 + val = stats[fsm_stat(type, percentile)].to_i || 0 val = 0 if val == 'undefined' val /= 1000.0 # Convert us to ms state = fsm_state(type, percentile, val) report( :host => opts[:riak_host],