class SysWatchdog DEFAULT_CONF_FILE = '/etc/sys_watchdog.yml' DEFAULT_LOG_FILE = '/var/log/sys_watchdog.log' def initialize conf_file: nil, log_file: nil log_file ||= DEFAULT_LOG_FILE conf_file ||= DEFAULT_CONF_FILE @logger = WdLogger.new log_file parse_conf conf_file setup end def run once: false loop do @tests.each{|test| run_test test} return if once sleep 60 end end private def setup if @conf.slack_token Slack.configure do |config| config.token = @conf.slack_token end end if @conf.smtp_server Mail.defaults do delivery_method :smtp, address: @conf.smtp_server, port: 587, :domain => @conf.smtp_domain, :enable_starttls_auto => true, :openssl_verify_mode => 'none' end end end def parse_conf conf_file check_conf_file conf_file conf = YAML.load_file conf_file conf.deep_symbolize_keys! @conf = OpenStruct.new conf[:config] @tests = conf[:tests].keys.map { |name| WdTest.new(name, conf[:tests][name], @logger) } end def check_conf_file conf_file unless File.readable? conf_file raise "Conf file #{conf_file} not found or unreadable. Aborting." end conf_stat = File.stat conf_file unless conf_stat.mode.to_s(8) =~ /0600$/ raise "Conf file #{conf_file} must have mode 0600. Aborting." end unless (conf_stat.uid == 0 and conf_stat.gid == 0) or (conf_stat.uid == Process.uid and conf_stat.gid == Process.gid) raise "Conf file #{conf_file} must have uid/gid set to root or to current running uid/gid. Aborting." end end def run_test test, after_restore: false new_status, exitstatus, output = test.run notify_output_change test, output return if new_status == test.status test.status = new_status if new_status notify "#{test.name} ok" else if test.restore_cmd and not after_restore test.restore run_test test, after_restore: true else fail test, exitstatus, output end end rescue => e @logger.error e.desc end def notify_output_change test, output if test.notify_on_output_change and test.previous_output != output notify "#{test.name} changed", "old: #{test.previous_output}\nnew: #{output}" test.previous_output = output end end def fail test, exitstatus, output body = "exitstatus: #{exitstatus}" body += "\noutput: #{output}" if output and not output.empty? notify "#{test.name} fail", body end end