tools/riemann-docker/bin/riemann-docker in riemann-tools-1.1.1 vs tools/riemann-docker/bin/riemann-docker in riemann-tools-1.2.0
- old
+ new
@@ -1,206 +1,8 @@
#!/usr/bin/env ruby
# frozen_string_literal: true
Process.setproctitle($PROGRAM_NAME)
-# Reports current CPU, disk, load average, and memory use to riemann.
+require 'riemann/tools/docker'
-require 'riemann/tools'
-
-module Riemann
- module Tools
- class DockerHealth
- require 'docker'
- require 'socket'
- include Riemann::Tools
- include Docker
-
- opt :docker_host, 'Docker Container Host (see https://github.com/swipely/docker-api#host)', type: String,
- default: nil
- opt :cpu_warning, 'CPU warning threshold (fraction of total jiffies)', default: 0.9
- opt :cpu_critical, 'CPU critical threshold (fraction of total jiffies)', default: 0.95
- opt :disk_warning, 'Disk warning threshold (fraction of space used)', default: 0.9
- opt :disk_critical, 'Disk critical threshold (fraction of space used)', default: 0.95
- opt :memory_warning, 'Memory warning threshold (fraction of RAM)', default: 0.85
- opt :memory_critical, 'Memory critical threshold (fraction of RAM)', default: 0.95
- opt :host_hostname, 'Suffix of host', type: String, default: nil
- opt :checks, 'A list of checks to run.', type: :strings, default: %w[cpu memory disk basic]
-
- def containers
- Docker::Container.all
- end
-
- def get_container_name(container)
- container.json['Name'][1..]
- end
-
- def initialize
- Docker.url = opts[:docker_host] unless opts[:docker_host].nil?
-
- @hostname = opts[:host_hostname]
- @hostname = Socket.gethostname if @hostname.nil? || !(@hostname.is_a? String) || @hostname.empty?
-
- @cpu_coefficient = 1000 * 1000 * 1000
-
- @limits = {
- cpu: { critical: opts[:cpu_critical], warning: opts[:cpu_warning] },
- disk: { critical: opts[:disk_critical], warning: opts[:disk_warning] },
- memory: { critical: opts[:memory_critical], warning: opts[:memory_warning] },
- }
-
- @last_cpu_reads = {}
- @last_uptime_reads = {}
-
- opts[:checks].each do |check|
- case check
- when 'disk'
- @disk_enabled = true
- when 'cpu'
- @cpu_enabled = true
- when 'memory'
- @memory_enabled = true
- when 'basic'
- @basic_inspection_enabled = true
- end
- end
- end
-
- def alert(container, service, state, metric, description)
- opts = {
- service: service.to_s,
- state: state.to_s,
- metric: metric.to_f,
- description: description,
- }
-
- opts[:host] = if !container.nil?
- "#{@hostname}-#{container}"
- else
- @hostname
- end
-
- report(opts)
- end
-
- def report_pct(container, service, fraction, report = '', name = nil)
- return unless fraction
-
- name = service if name.nil?
-
- if fraction > @limits[service][:critical]
- alert container, name, :critical, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
- elsif fraction > @limits[service][:warning]
- alert container, name, :warning, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
- else
- alert container, name, :ok, fraction, "#{format('%.2f', fraction * 100)}% #{report}"
- end
- end
-
- def cpu(id, name, stats)
- current = stats['precpu_stats']['cpu_usage']['total_usage'] / stats['precpu_stats']['cpu_usage']['percpu_usage'].count
-
- unless current
- alert name, :cpu, :unknown, nil, 'no total usage found in docker remote api stats'
- return false
- end
-
- current_time = Time.parse(stats['read'])
- unless @last_cpu_reads[id].nil?
- last = @last_cpu_reads[id]
- used = (current - last[:v]) / (current_time - last[:t]) / @cpu_coefficient
-
- report_pct name, :cpu, used
- end
-
- @last_cpu_reads[id] = { v: current, t: current_time }
- end
-
- def memory(_id, name, stats)
- memory_stats = stats['memory_stats']
- usage = memory_stats['usage'].to_f
- total = memory_stats['limit'].to_f
- fraction = (usage / total)
-
- report_pct name, :memory, fraction, "#{usage} / #{total}"
- end
-
- def disk
- `df -P`.split(/\n/).each do |r|
- f = r.split(/\s+/)
- next if f[0] == 'Filesystem'
- next unless f[0] =~ %r{/} # Needs at least one slash in the mount path
-
- # Calculate capacity
- x = f[4].to_f / 100
- report_pct(nil, :disk, x, "#{f[3].to_i / 1024} mb left", "disk #{f[5]}")
- end
- end
-
- def basic_inspection(id, name, inspection)
- state = inspection['State']
- json_state = JSON.generate(state)
-
- running = state['Running']
-
- alert(
- name, 'status',
- running ? 'ok' : 'critical',
- running ? 1 : 0,
- json_state,
- )
-
- return unless running
-
- start_time = DateTime.rfc3339(state['StartedAt']).to_time.utc.to_i
- now = DateTime.now.to_time.utc.to_i
- uptime = now - start_time
-
- unless @last_uptime_reads[id].nil?
- last = @last_uptime_reads[id]
- restarted = start_time != last
- alert(
- name, 'uptime',
- restarted ? 'critical' : 'ok',
- uptime,
- "last 'StartedAt' measure was #{last} (#{Time.at(last).utc}), " \
- "now it's #{start_time} (#{Time.at(start_time).utc})",
- )
- end
-
- @last_uptime_reads[id] = start_time
- end
-
- def tick
- # Disk is the same in every container
- disk if @disk_enabled
-
- # Get CPU, Memory and Load of each container
- threads = []
-
- containers.each do |ctr|
- threads << Thread.new(ctr) do |container|
- id = container.id
- name = get_container_name(container)
-
- stats = Docker::Util.parse_json(container.connection.get("/containers/#{id}/stats", { stream: false }))
-
- if @basic_inspection_enabled
- inspection = Docker::Util.parse_json(container.connection.get("/containers/#{id}/json"))
- basic_inspection(id, name, inspection)
- end
- cpu(id, name, stats) if @cpu_enabled
- memory(id, name, stats) if @memory_enabled
- end
- end
-
- threads.each do |thread|
- thread.join
- rescue StandardError => e
- warn "#{e.class} #{e}\n#{e.backtrace.join "\n"}"
- end
- end
- end
- end
-end
-
-Riemann::Tools::DockerHealth.run
+Riemann::Tools::Docker.run