require 'remon/metrics/system' defcheck "system" do opts ({ cpu_warning: 85, cpu_critical: 95, iowait_warning: 30, iowait_critical: 70, load_warning: 3, load_critical: 8, memory_warning: 0.85, memory_critical: 0.95 }) def init @sys = Metrics::System.new end def run [*cpu_and_iowait, loadavg, memory, uptime] end def cpu_and_iowait old_cpu = @old_cpu new_cpu = @sys.cpu_stat if not new_cpu return e 'cpu', :unknown, nil, "/proc/stat doesn't include a CPU line" end @old_cpu = new_cpu return nil if not old_cpu used, iowait = @sys.cpu_usage(old_cpu, new_cpu) [cpu_event(used), iowait_event(iowait)] end def loadavg metric = @sys.loadavg_normalized event({ service: "load", metric: metric, description: "1-minute load average/core is #{metric}", state: service_state("load", metric) }) end def memory metric = @sys.memory description = "#{(metric * 100).round(2)}% used\n\n#{`ps -eo pmem,pid,comm | sort -nrb -k1 | head -10`.chomp}" event({ service: "memory", metric: metric, description: description, state: service_state("memory", metric) }) end def uptime up_seconds = @sys.uptime metric = (up_seconds/24/3600).round(2) @ips ||= Sysinfo.ips.join(", ") description = <<~HEREDOC ip: "#{@ips}" instance_type: "#{Sysinfo.instance_type}" HEREDOC event({ service: "uptime", metric: metric, description: description, state: "ok" }) end private def cpu_event(metric) description = "#{(metric * 100).round(2)}% user+nice+system\n\n#{`ps -eo pcpu,pid,comm | sort -nrb -k1 | head -10`.chomp}" event service: "cpu", description: description, metric: metric, state: service_state("cpu", metric * 100) end def iowait_event(metric) description = "#{metric * 100 }% iowait" event service: "iowait", description: description, metric: metric, state: service_state("iowait", metric * 100) end end