bin/riemann-riak in riemann-tools-dgvz-0.2.2.1 vs bin/riemann-riak in riemann-tools-dgvz-0.2.2.2
- old
+ new
@@ -25,12 +25,12 @@
opt :get_99_warning, "FSM 99% get time warning threshold (ms)", :default => 10000
opt :put_99_warning, "FSM 99% put time warning threshold (ms)", :default => 10000
def initialize
detect_features
-
- @httpstatus = true
+
+ @httpstatus = true
# What's going on here? --aphyr
if
begin
uri = URI.parse(opts[:riak_host])
if uri.host == nil
@@ -57,11 +57,11 @@
# Identifies whether escript and riak-admin are installed
def detect_features
@escript = true # Whether escript is present on this machine
@riakadmin = true # Whether riak-admin is present
-
+
if `which escript` =~ /^\s*$/
@escript = false
end
if `which riak-admin` =~ /^\s*$/
@@ -111,10 +111,38 @@
:description => keys
)
end
end
+ def check_transfers
+ str = if @riakadmin
+ `riak-admin transfers`
+ else
+ nil
+ end
+
+ return if str.nil?
+
+ if str =~ /'#{opts[:node_name]}' waiting to handoff (\d+) partitions/
+ report(
+ :host => opts[:riak_host],
+ :service => 'riak transfers',
+ :state => 'critical',
+ :metric => $1.to_i,
+ :description => "waiting to handoff #{$1} partitions"
+ )
+ else
+ report(
+ :host => opts[:riak_host],
+ :service => 'riak transfers',
+ :state => 'ok',
+ :metric => 0,
+ :description => "No pending transfers"
+ )
+ end
+ end
+
def check_disk
gb = `du -Ls #{opts[:data_dir]}`.split(/\s+/).first.to_i / (1024.0**2)
report(
:host => opts[:riak_host],
:service => 'riak disk',
@@ -292,9 +320,10 @@
# This can utterly destroy a cluster, so we disable
# check_keys
check_stats
check_ring
check_disk
+ check_transfers
end
end
Riemann::Tools::Riak.run