tools/riemann-docker/bin/riemann-docker in riemann-tools-1.0.0 vs tools/riemann-docker/bin/riemann-docker in riemann-tools-1.1.0
- old
+ new
@@ -1,217 +1,206 @@
#!/usr/bin/env ruby
-Process.setproctitle($0)
+# frozen_string_literal: true
+Process.setproctitle($PROGRAM_NAME)
+
# Reports current CPU, disk, load average, and memory use to riemann.
require 'riemann/tools'
-class Riemann::Tools::DockerHealth
- require 'docker'
- require 'socket'
- include Riemann::Tools
- include Docker
+module Riemann
+ module Tools
+ class DockerHealth
+ require 'docker'
+ require 'socket'
+ include Riemann::Tools
+ include Docker
- opt :docker_host, "Docker Container Host (see https://github.com/swipely/docker-api#host)", :type => String, :default => nil
- opt :cpu_warning, "CPU warning threshold (fraction of total jiffies)", :default => 0.9
- opt :cpu_critical, "CPU critical threshold (fraction of total jiffies)", :default => 0.95
- opt :disk_warning, "Disk warning threshold (fraction of space used)", :default => 0.9
- opt :disk_critical, "Disk critical threshold (fraction of space used)", :default => 0.95
- opt :memory_warning, "Memory warning threshold (fraction of RAM)", :default => 0.85
- opt :memory_critical, "Memory critical threshold (fraction of RAM)", :default => 0.95
- opt :host_hostname, "Suffix of host", :type => String, :default => nil
- opt :checks, "A list of checks to run.", :type => :strings, :default => ['cpu', 'memory', 'disk', 'basic']
+ opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
+ default: nil
+ opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
+ opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
+ opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
+ opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
+ opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
+ opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
+ opt :host_hostname, 'Suffix of host', type: String, default: nil
+ opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
- def get_containers
- Docker::Container.all
- end
+ def containers
+ Docker::Container.all
+ end
- def get_container_name(container)
- container.json['Name'][1..-1]
- end
+ def get_container_name(container)
+ container.json['Name'][1..]
+ end
- def initialize
+ def initialize
+ Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
- if (opts[:docker_host] != nil)
- Docker.url = opts[:docker_host]
- end
+ @hostname = opts[:host_hostname]
+ @hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
- @hostname = opts[:host_hostname]
- if (@hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?)
- @hostname = Socket.gethostname
- end
+ @cpu_coefficient = 1000 * 1000 * 1000
- @cpu_coefficient = 1000 * 1000 * 1000
+ @limits = {
+ cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
+ disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
+ memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
+ }
- @limits = {
- :cpu => {:critical => opts[:cpu_critical], :warning => opts[:cpu_warning]},
- :disk => {:critical => opts[:disk_critical], :warning => opts[:disk_warning]},
- :memory => {:critical => opts[:memory_critical], :warning => opts[:memory_warning]}
- }
+ @last_cpu_reads = {}
+ @last_uptime_reads = {}
- @last_cpu_reads = Hash.new
- @last_uptime_reads = Hash.new
-
- opts[:checks].each do |check|
- case check
- when 'disk'
- @disk_enabled = true
- when 'cpu'
- @cpu_enabled = true
- when 'memory'
- @memory_enabled = true
- when 'basic'
- @basic_inspection_enabled = true
+ opts[:checks].each do |check|
+ case check
+ when 'disk'
+ @disk_enabled = true
+ when 'cpu'
+ @cpu_enabled = true
+ when 'memory'
+ @memory_enabled = true
+ when 'basic'
+ @basic_inspection_enabled = true
+ end
+ end
end
- end
- end
- def alert(container, service, state, metric, description)
+ def alert(container, service, state, metric, description)
+ opts = {
+ service: service.to_s,
+ state: state.to_s,
+ metric: metric.to_f,
+ description: description,
+ }
- opts = { :service => service.to_s,
- :state => state.to_s,
- :metric => metric.to_f,
- :description => description }
+ opts[:host] = if !container.nil?
+ "#{@hostname}-#{container}"
+ else
+ @hostname
+ end
- if (container != nil)
- opts[:host] = "#{@hostname}-#{container}"
- else
- opts[:host] = @hostname
- end
+ report(opts)
+ end
- report(opts)
- end
+ def report_pct(container, service, fraction, report = '', name = nil)
+ return unless fraction
- def report_pct(container, service, fraction, report = '', name = nil)
- if fraction
+ name = service if name.nil?
- if (name == nil)
- name = service
+ if fraction > @limits[service][:critical]
+ alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
+ elsif fraction > @limits[service][:warning]
+ alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
+ else
+ alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
+ end
end
- if fraction > @limits[service][:critical]
- alert container, name, :critical, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
- elsif fraction > @limits[service][:warning]
- alert container, name, :warning, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
- else
- alert container, name, :ok, fraction, "#{sprintf("%.2f", fraction * 100)}% #{report}"
- end
- end
- end
+ def cpu(id, name, stats)
+ current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
+ unless current
+ alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
+ return false
+ end
- def cpu(id, name, stats)
+ current_time = Time.parse(stats['read'])
+ unless @last_cpu_reads[id].nil?
+ last = @last_cpu_reads[id]
+ used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
- current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
+ report_pct name, :cpu, used
+ end
- unless current
- alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
- return false
- end
+ @last_cpu_reads[id] = { v: current, t: current_time }
+ end
- current_time = Time.parse(stats['read']);
- if (@last_cpu_reads[id] != nil)
- last = @last_cpu_reads[id]
- used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
+ def memory(_id, name, stats)
+ memory_stats = stats['memory_stats']
+ usage = memory_stats['usage'].to_f
+ total = memory_stats['limit'].to_f
+ fraction = (usage / total)
- report_pct name, :cpu, used
- end
+ report_pct name, :memory, fraction, "#{usage} / #{total}"
+ end
- @last_cpu_reads[id] = { v: current, t: current_time }
- end
+ def disk
+ `df -P`.split(/\n/).each do |r|
+ f = r.split(/\s+/)
+ next if f[0] == 'Filesystem'
+ next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
- def memory(id, name, stats)
- memory_stats = stats['memory_stats']
- usage = memory_stats['usage'].to_f
- total = memory_stats['limit'].to_f
- fraction = (usage / total)
+ # Calculate capacity
+ x = f[4].to_f / 100
+ report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
+ end
+ end
- report_pct name, :memory, fraction, "#{usage} / #{total}"
- end
+ def basic_inspection(id, name, inspection)
+ state = inspection['State']
+ json_state = JSON.generate(state)
- def disk
- `df -P`.split(/\n/).each do |r|
- f = r.split(/\s+/)
- next if f[0] == 'Filesystem'
- next unless f[0] =~ /\// # Needs at least one slash in the mount path
+ running = state['Running']
- # Calculate capacity
- x = f[4].to_f/100
- report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
- end
- end
+ alert(
+ name, 'status',
+ running ? 'ok' : 'critical',
+ running ? 1 : 0,
+ json_state,
+ )
- def basic_inspection(id, name, inspection)
+ return unless running
- state = inspection['State']
- json_state = JSON.generate(state)
+ start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
+ now = DateTime.now.to_time.utc.to_i
+ uptime = now - start_time
- running = state['Running']
+ unless @last_uptime_reads[id].nil?
+ last = @last_uptime_reads[id]
+ restarted = start_time != last
+ alert(
+ name, 'uptime',
+ restarted ? 'critical' : 'ok',
+ uptime,
+ "last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
+ "now it's #{start_time} (#{Time.at(start_time).utc})",
+ )
+ end
- alert(name, "status",
- running ? "ok" : "critical",
- running ? 1 : 0,
- json_state)
-
- if (running)
- start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
- now = DateTime.now.to_time.utc.to_i
- uptime = now - start_time
-
- if (@last_uptime_reads[id] != nil)
- last = @last_uptime_reads[id]
- restarted = start_time != last
- alert(name, "uptime",
- restarted ? "critical" : "ok",
- uptime,
- "last 'StartedAt' measure was #{last} (#{Time.at(last).utc.to_s}), " +
- "now it's #{start_time} (#{Time.at(start_time).utc.to_s})")
+ @last_uptime_reads[id] = start_time
end
- @last_uptime_reads[id] = start_time
- end
- end
+ def tick
+ # Disk is the same in every container
+ disk if @disk_enabled
- def tick
+ # Get CPU, Memory and Load of each container
+ threads = []
- # Disk is the same in every container
- if @disk_enabled
- disk()
- end
+ containers.each do |ctr|
+ threads << Thread.new(ctr) do |container|
+ id = container.id
+ name = get_container_name(container)
- # Get CPU, Memory and Load of each container
- containers = get_containers()
- threads = []
+ stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
- containers.each do |ctr|
- threads << Thread.new(ctr) do |container|
-
- id = container.id
- name = get_container_name(container)
-
- stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", {stream:false}))
-
- if @basic_inspection_enabled
- inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
- basic_inspection(id, name, inspection)
+ if @basic_inspection_enabled
+ inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
+ basic_inspection(id, name, inspection)
+ end
+ cpu(id, name, stats) if @cpu_enabled
+ memory(id, name, stats) if @memory_enabled
end
- if @cpu_enabled
- cpu(id, name, stats)
end
- if @memory_enabled
- memory(id, name, stats)
+
+ threads.each do |thread|
+ thread.join
+ rescue StandardError => e
+ warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
end
end
end
-
- threads.each do |thread|
- begin
- thread.join
- rescue => e
- $stderr.puts "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
- end
- end
end
end
Riemann::Tools::DockerHealth.run
-