bin/riemann-riak in riemann-tools-0.0.9 vs bin/riemann-riak in riemann-tools-0.1.0
- old
+ new
@@ -13,29 +13,49 @@
opt :riak_host, "Riak host", :default => Socket.gethostname
opt :data_dir, "Riak data directory", :default => '/var/lib/riak'
opt :stats_port, "Riak HTTP port for stats", :default => 8098
opt :stats_path, "Riak HTTP stats path", :default => '/stats'
opt :node_name, "Riak erlang node name", :default => "riak@#{Socket.gethostname}"
-
+
opt :get_50_warning, "FSM 50% get time warning threshold (ms)", :default => 1000
opt :put_50_warning, "FSM 50% put time warning threshold (ms)", :default => 1000
opt :get_95_warning, "FSM 95% get time warning threshold (ms)", :default => 2000
opt :put_95_warning, "FSM 95% put time warning threshold (ms)", :default => 2000
opt :get_99_warning, "FSM 99% get time warning threshold (ms)", :default => 10000
opt :put_99_warning, "FSM 99% put time warning threshold (ms)", :default => 10000
def initialize
+ @escript = true
+ @riakadmin = true
+ @httpstats = true
+
if `which escript` =~ /^\s*$/
- puts "No escript; disabling ring/key checks."
- @no_escript = true
+ @escript = false
end
+
+ if `which riak-admin` =~ /^\s*$/
+ @riakadmin = false
+ end
+
+ if
+ begin
+ Net::HTTP.start(opts[:riak_host], opts[:stats_port]) do |http|
+ http.get opts[:stats_path]
+ end
+ rescue => e
+ @httpstatus = false
+ end
+ end
end
def check_ring
- return if @no_escript
+ if @escript
+ str = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-ring #{opts[:node_name]}`.chomp
+ elsif @riakadmin
+ str = `riak-admin ringready`
+ end
- str = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-ring #{opts[:node_name]}`.chomp
if str =~ /^TRUE/
report(
:host => opts[:riak_host],
:service => 'riak ring',
:state => 'ok',
@@ -50,12 +70,10 @@
)
end
end
def check_keys
- return if @no_escript
-
keys = `#{File.expand_path(File.dirname(__FILE__))}/riemann-riak-keys #{opts[:node_name]}`.chomp
if keys =~ /^\d+$/
report(
:host => opts[:riak_host],
:service => 'riak keys',
@@ -64,11 +82,11 @@
:description => keys
)
else
report(
:host => opts[:riak_host],
- :servie => 'riak keys',
+ :service => 'riak keys',
:state => 'unknown',
:description => keys
)
end
end
@@ -101,62 +119,74 @@
'critical'
end
end
def check_stats
- begin
- res = Net::HTTP.start(opts[:riak_host], opts[:stats_port]) do |http|
- http.get opts[:stats_path]
+ if @httpstatus
+ begin
+ res = Net::HTTP.start(opts[:riak_host], opts[:stats_port]) do |http|
+ http.get opts[:stats_path]
+ end
+ rescue => e
+ report(
+ :host => opts[:riak_host],
+ :service => 'riak',
+ :state => 'critical',
+ :description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}"
+ )
+ return
end
- rescue => e
- report(
- :host => opts[:riak_host],
- :service => 'riak',
- :state => 'critical',
- :description => "error fetching #{opts[:riak_host]}:#{opts[:stats_port]} #{e.class}, #{e.message}"
- )
- return
- end
- if res.code.to_i == 200
- stats = JSON.parse(res.body)
+ if res.code.to_i == 200
+ stats = JSON.parse(res.body)
+ else
+ report(
+ :host => opts[:riak_host],
+ :service => 'riak',
+ :state => 'critical',
+ :description => "stats returned HTTP #{res.code}:\n\n#{res.body}"
+ )
+ return
+ end
+ elsif @riakadmin
+ stats = Hash[`riak-admin status`.split(/\n/).map{|i| i.split(/ : /)}]
else
- report(
- :host => opts[:riak_host],
- :service => 'riak',
- :state => 'critical',
- :description => "stats returned HTTP #{res.code}:\n\n#{res.body}"
- )
- return
+ report(
+ :host => opts[:riak_host],
+ :service => 'riak',
+ :state => 'critical',
+ :description => "error fetching Riak stats"
+ )
+ return
end
report(
:host => opts[:riak_host],
:service => 'riak',
:state => 'ok'
)
# Gets/puts/rr
[
- 'vnode_gets',
- 'vnode_puts',
- 'node_gets',
- 'node_puts',
- 'read_repairs'
+ 'vnode_gets',
+ 'vnode_puts',
+ 'node_gets',
+ 'node_puts',
+ 'read_repairs'
].each do |s|
report(
:host => opts[:riak_host],
:service => "riak #{s}",
:state => 'ok',
- :metric => stats[s]/60.0,
- :description => "#{stats[s]/60.0}/sec"
+ :metric => stats[s].to_i/60.0,
+ :description => "#{stats[s].to_i/60.0}/sec"
)
end
# FSMs
['get', 'put'].each do |type|
[50, 95, 99].each do |percentile|
- val = stats[fsm_stat(type, percentile)] || 0
+ val = stats[fsm_stat(type, percentile)].to_i || 0
val = 0 if val == 'undefined'
val /= 1000.0 # Convert us to ms
state = fsm_state(type, percentile, val)
report(
:host => opts[:riak_host],