tools/riemann-rabbitmq/bin/riemann-rabbitmq in riemann-tools-1.0.0 vs tools/riemann-rabbitmq/bin/riemann-rabbitmq in riemann-tools-1.1.0
- old
+ new
@@ -1,269 +1,273 @@
#!/usr/bin/env ruby
-Process.setproctitle($0)
+# frozen_string_literal: true
+Process.setproctitle($PROGRAM_NAME)
+
require 'riemann/tools'
-class Riemann::Tools::Rabbitmq
- include Riemann::Tools
+module Riemann
+ module Tools
+ class Rabbitmq
+ include Riemann::Tools
- require 'faraday'
- require 'json'
- require 'uri'
+ require 'faraday'
+ require 'json'
+ require 'uri'
+ opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
+ opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
- opt :read_timeout, 'Faraday read timeout', type: :int, default: 2
- opt :open_timeout, 'Faraday open timeout', type: :int, default: 1
+ opt :monitor_user, 'RabbitMQ monitoring user', type: :string
+ opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
+ opt :monitor_port, 'RabbitMQ monitoring port', type: :int, default: 15_672
+ opt :monitor_host, 'RabbitMQ monitoring host', type: :string, default: 'localhost'
+ opt :monitor_use_tls, 'RabbitMQ use tls', type: :bool, default: false
- opt :monitor_user, 'RabbitMQ monitoring user', type: :string
- opt :monitor_pass, 'RabbitMQ monitoring user password', type: :string
- opt :monitor_port, 'RabbitMQ monitoring port', type: :int, default: 15672
- opt :monitor_host, 'RabbitMQ monitoring host', type: :string, default: "localhost"
- opt :monitor_use_tls, 'RabbitMQ use tls', type: :bool, default: false
+ opt :max_queue_size, 'max number of items in a queue that is acceptable', type: :int, default: 1_000_000
+ opt :ignore_max_size_queues, "A regular expression to match queues that shouldn't be size-checked", type: :string
- opt :max_queue_size, "max number of items in a queue that is acceptable", type: :int, default: 1_000_000
- opt :ignore_max_size_queues, "A regular expression to match queues that shouldn't be size-checked", type: :string
+ opt :node, 'Specify a node to monitor', type: :strings
- opt :node, "Specify a node to monitor", type: :strings
+ def base_url
+ protocol = 'http'
+ protocol = 'https' if options[:monitor_use_tls] && (options[:monitor_use_tls] == true)
+ "#{protocol}://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api"
+ end
- def base_url
- protocol = "http"
- if (options[:monitor_use_tls]) && (options[:monitor_use_tls]==true)
- protocol = "https"
- end
- "#{protocol}://#{options[:monitor_user]}:#{options[:monitor_pass]}@#{options[:monitor_host]}:#{options[:monitor_port]}/api"
- end
+ def overview_url
+ "#{base_url}/overview"
+ end
- def overview_url
- "#{base_url}/overview"
- end
+ def node_url(node)
+ "#{base_url}/nodes/#{node}"
+ end
- def node_url(n)
- "#{base_url}/nodes/#{n}"
- end
+ def queues_url
+ "#{base_url}/queues"
+ end
- def queues_url
- "#{base_url}/queues"
- end
+ def event_host
+ options[:event_host] || :monitor_host
+ end
- def event_host
- if options[:event_host]
- return options[:event_host]
- else
- return options[:monitor_host]
- end
- end
-
- def safe_get(uri, event_host)
- # Handle connection timeouts
- response = nil
- begin
- connection = Faraday.new(uri)
- response = connection.get do |req|
- req.options[:timeout] = options[:read_timeout]
- req.options[:open_timeout] = options[:open_timeout]
+ def safe_get(uri, event_host)
+ # Handle connection timeouts
+ response = nil
+ begin
+ connection = Faraday.new(uri)
+ response = connection.get do |req|
+ req.options[:timeout] = options[:read_timeout]
+ req.options[:open_timeout] = options[:open_timeout]
+ end
+ report(
+ host: event_host,
+ service: 'rabbitmq monitoring',
+ state: 'ok',
+ description: 'Monitoring operational',
+ )
+ rescue StandardError => e
+ report(
+ host: event_host,
+ service: 'rabbitmq monitoring',
+ state: 'critical',
+ description: "HTTP connection error: #{e.class} - #{e.message}",
+ )
end
- report(:host => event_host,
- :service => "rabbitmq monitoring",
- :state => 'ok',
- :description => "Monitoring operational"
- )
- rescue => e
- report(:host => event_host,
- :service => "rabbitmq monitoring",
- :state => "critical",
- :description => "HTTP connection error: #{e.class} - #{e.message}"
- )
+ response
end
- response
- end
- def check_queues
- response = safe_get(queues_url, event_host)
- max_size_check_filter = if options[:ignore_max_size_queues]
- Regexp.new(options[:ignore_max_size_queues])
- else
- nil
- end
+ def check_queues
+ response = safe_get(queues_url, event_host)
+ max_size_check_filter = (Regexp.new(options[:ignore_max_size_queues]) if options[:ignore_max_size_queues])
- return if response.nil?
+ return if response.nil?
- json = JSON.parse(response.body)
+ if response.status != 200
+ report(
+ host: event_host,
+ service: 'rabbitmq.queue',
+ state: 'critical',
+ description: "HTTP connection error to /api/queues: #{response.status} - #{response.body}",
+ )
+ else
+ report(
+ host: event_host,
+ service: 'rabbitmq.queue',
+ state: 'ok',
+ description: 'HTTP connection ok',
+ )
- if response.status != 200
- report(:host => event_host,
- :service => "rabbitmq.queue",
- :state => "critical",
- :description => "HTTP connection error to /api/queues: #{response.status} - #{response.body}"
- )
- else
- report(:host => event_host,
- :service => "rabbitmq.queue",
- :state => "ok",
- :description => "HTTP connection ok"
- )
+ json = JSON.parse(response.body)
- json = JSON.parse(response.body)
+ json.each do |queue|
+ svc = "rabbitmq.queue.#{queue['vhost']}.#{queue['name']}"
+ errs = []
- json.each do |queue|
- svc = "rabbitmq.queue.#{queue['vhost']}.#{queue['name']}"
- errs = []
+ errs << 'Queue has jobs but no consumers' if !queue['messages_ready'].nil? && (queue['messages_ready']).positive? && (queue['consumers']).zero?
- if queue['messages_ready']!=nil and queue['messages_ready'] > 0 and queue['consumers'] == 0
- errs << "Queue has jobs but no consumers"
- end
+ errs << "Queue has #{queue['messages_ready']} jobs" if (max_size_check_filter.nil? || queue['name'] !~ (max_size_check_filter)) && !queue['messages_ready'].nil? && (queue['messages_ready'] > options[:max_queue_size])
- if (max_size_check_filter.nil? or queue['name'] !~ max_size_check_filter) and queue['messages_ready']!=nil and queue['messages_ready'] > options[:max_queue_size]
- errs << "Queue has #{queue['messages_ready']} jobs"
+ if errs.empty?
+ report(
+ host: event_host,
+ service: svc,
+ state: 'ok',
+ description: 'Queue is looking good',
+ )
+ else
+ report(
+ host: event_host,
+ service: svc,
+ state: 'critical',
+ description: errs.join('; '),
+ )
+ end
+
+ stats = (queue['message_stats'] || {}).merge(
+ 'messages' => queue['messages'],
+ 'messages_details' => queue['messages_details'],
+ 'messages_ready' => queue['messages_ready'],
+ 'messages_ready_details' => queue['messages_ready_details'],
+ 'messages_unacknowledged' => queue['messages_unacknowledged'],
+ 'messages_unacknowledged_details' => queue['messages_unacknowledged_details'],
+ 'consumers' => queue['consumers'],
+ 'memory' => queue['memory'],
+ )
+
+ stats.each_pair do |k, v|
+ service = "#{svc}.#{k}"
+ metric = if k =~ (/details$/) && !v.nil?
+ v['rate']
+ else
+ v
+ end
+
+ # TODO: Set state via thresholds which can be configured
+
+ report(
+ host: event_host,
+ service: service,
+ metric: metric,
+ description: 'RabbitMQ monitor',
+ )
+ end
+ end
end
+ end
- if errs.empty?
- report(:host => event_host,
- :service => svc,
- :state => "ok",
- :description => "Queue is looking good"
+ def check_overview
+ uri = URI(overview_url)
+ response = safe_get(uri, event_host)
+
+ return if response.nil?
+
+ json = JSON.parse(response.body)
+
+ if response.status != 200
+ report(
+ host: event_host,
+ service: 'rabbitmq',
+ state: 'critical',
+ description: "HTTP connection error: #{response.status} - #{response.body}",
)
else
- report(:host => event_host,
- :service => svc,
- :state => "critical",
- :description => errs.join("; ")
+ report(
+ host: event_host,
+ service: 'rabbitmq monitoring',
+ state: 'ok',
+ description: 'HTTP connection ok',
)
- end
- stats = (queue['message_stats'] || {}).merge(
- 'messages' => queue['messages'],
- 'messages_details' => queue['messages_details'],
- 'messages_ready' => queue['messages_ready'],
- 'messages_ready_details' => queue['messages_ready_details'],
- 'messages_unacknowledged' => queue['messages_unacknowledged'],
- 'messages_unacknowledged_details' => queue['messages_unacknowledged_details'],
- 'consumers' => queue['consumers'],
- 'memory' => queue['memory'],
- )
+ %w[message_stats queue_totals object_totals].each do |stat|
+ # NOTE: / BUG ?
+ # Brand new servers can have blank message stats. Is this ok?
+ # I can't decide.
+ next if json[stat].empty?
- stats.each_pair do |k,v|
- service = "#{svc}.#{k}"
- if k =~ /details$/ and v!=nil
- metric = v['rate']
- else
- metric = v
- end
+ json[stat].each_pair do |k, v|
+ service = "rabbitmq.#{stat}.#{k}"
+ metric = if k =~ /details$/
+ v['rate']
+ else
+ v
+ end
- # TODO: Set state via thresholds which can be configured
+ # TODO: Set state via thresholds which can be configured
- report(:host => event_host,
- :service => service,
- :metric => metric,
- :description => "RabbitMQ monitor"
- )
+ report(
+ host: event_host,
+ service: service,
+ metric: metric,
+ description: 'RabbitMQ monitor',
+ )
+ end
+ end
end
end
- end
- end
- def check_overview
- uri = URI(overview_url)
- response = safe_get(uri, event_host)
+ def check_node
+ opts[:node].each do |n|
+ uri = URI(node_url(n))
+ response = safe_get(uri, event_host)
- return if response.nil?
+ break if response.nil?
- json = JSON.parse(response.body)
+ if response.status != 200
+ if response.status == 404
+ report(
+ host: event_host,
+ service: "rabbitmq.node.#{n}",
+ state: 'critical',
+ description: 'Node was not found in the cluster',
+ )
+ else
+ report(
+ host: event_host,
+ service: "rabbitmq.node.#{n}",
+ state: 'critical',
+ description: "HTTP error: #{response.status} - #{response.body}",
+ )
+ end
+ break
+ end
- if response.status != 200
- report(:host => event_host,
- :service => "rabbitmq",
- :state => "critical",
- :description => "HTTP connection error: #{response.status} - #{response.body}"
- )
- else
- report(:host => event_host,
- :service => "rabbitmq monitoring",
- :state => "ok",
- :description => "HTTP connection ok"
- )
+ json = JSON.parse(response.body)
- %w( message_stats queue_totals object_totals ).each do |stat|
- # NOTE / BUG ?
- # Brand new servers can have blank message stats. Is this ok?
- # I can't decide.
- next if json[stat].empty?
- json[stat].each_pair do |k,v|
- service = "rabbitmq.#{stat}.#{k}"
- if k =~ /details$/
- metric = v['rate']
- else
- metric = v
+ if json['mem_alarm']
+ report(
+ host: event_host,
+ service: "rabbitmq.node.#{n}",
+ state: 'critical',
+ description: 'Memory alarm has triggered; job submission throttled',
+ )
+ break
end
- # TODO: Set state via thresholds which can be configured
+ if json['disk_free_alarm']
+ report(
+ host: event_host,
+ service: "rabbitmq.node.#{n}",
+ state: 'critical',
+ description: 'Disk free alarm has triggered; job submission throttled',
+ )
+ break
+ end
- report(:host => event_host,
- :service => service,
- :metric => metric,
- :description => "RabbitMQ monitor"
- )
- end
- end
- end
- end
-
- def check_node
- opts[:node].each do |n|
- uri = URI(node_url(n))
- response = safe_get(uri, event_host)
-
- return if response.nil?
-
- if response.status != 200
- if response.status == 404
- report(:host => event_host,
- :service => "rabbitmq.node.#{n}",
- :state => "critical",
- :description => "Node was not found in the cluster"
+ report(
+ host: event_host,
+ service: "rabbitmq.node.#{n}",
+ state: 'ok',
+ description: 'Node looks OK to me',
)
- else
- report(:host => event_host,
- :service => "rabbitmq.node.#{n}",
- :state => "critical",
- :description => "HTTP error: #{response.status} - #{response.body}"
- )
end
- return
end
- json = JSON.parse(response.body)
-
- if json['mem_alarm']
- report(:host => event_host,
- :service => "rabbitmq.node.#{n}",
- :state => "critical",
- :description => "Memory alarm has triggered; job submission throttled"
- )
- return
+ def tick
+ check_overview
+ check_node if opts[:node]
+ check_queues
end
-
- if json['disk_free_alarm']
- report(:host => event_host,
- :service => "rabbitmq.node.#{n}",
- :state => "critical",
- :description => "Disk free alarm has triggered; job submission throttled"
- )
- return
- end
-
- report(:host => event_host,
- :service => "rabbitmq.node.#{n}",
- :state => "ok",
- :description => "Node looks OK to me"
- )
end
end
-
- def tick
- check_overview
- check_node if opts[:node]
- check_queues
- end
end
Riemann::Tools::Rabbitmq.run
-