#! /usr/bin/env ruby # frozen_string_literal: true # # check-postgres-replication # # DESCRIPTION: # # This plugin checks postgresql replication lag # # OUTPUT: # plain text # # PLATFORMS: # Linux # # DEPENDENCIES: # gem: sensu-plugin # gem: pg # # USAGE: # ./check-postgres-replication.rb -m master_host -s slave_host -P port -d db -u db_user -p db_pass -w warn_threshold -c crit_threshold # # NOTES: # # LICENSE: # Released under the same terms as Sensu (the MIT license); see LICENSE # for details. # require 'sensu-plugins-postgres/pgpass' require 'sensu-plugins-postgres/pgutil' require 'sensu-plugin/check/cli' require 'pg' class CheckPostgresReplicationStatus < Sensu::Plugin::Check::CLI option :pgpass, description: 'Pgpass file', short: '-f FILE', long: '--pgpass', default: ENV['PGPASSFILE'] || "#{ENV['HOME']}/.pgpass" option(:master_host, short: '-m', long: '--master-host=HOST', required: true, description: 'PostgreSQL master HOST') option(:slave_host, short: '-s', long: '--slave-host=HOST', required: true, description: 'PostgreSQL slave HOST') option(:port, short: '-P', long: '--port=PORT', description: 'PostgreSQL port') option(:database, short: '-d', long: '--database=NAME', description: 'Database NAME') option(:user, short: '-u', long: '--user=USER', description: 'Database USER') option(:password, short: '-p', long: '--password=PASSWORD', description: 'Database PASSWORD') option(:ssl, short: '-S', long: '--ssl', boolean: true, description: 'Require SSL') option(:warn, short: '-w', long: '--warning=VALUE', description: 'Warning threshold for replication lag (in MB)', default: 900, # #YELLOW proc: lambda { |s| s.to_i }) # rubocop:disable Lambda option(:crit, short: '-c', long: '--critical=VALUE', description: 'Critical threshold for replication lag (in MB)', default: 1800, # #YELLOW proc: lambda { |s| s.to_i }) # rubocop:disable Lambda option(:timeout, short: '-T TIMEOUT', long: '--timeout=TIMEOUT', default: 2, description: 'Connection timeout (seconds)', proc: proc(&:to_i)) include Pgpass include PgUtil def run ssl_mode = config[:ssl] ? 'require' : 'prefer' critical 'Master and slave cannot be the same host' if config[:master_host] == config[:slave_host] # Establishing connection to the master pgpass conn_master = PG.connect(host: config[:master_host], dbname: config[:database], user: config[:user], password: config[:password], port: config[:port], sslmode: ssl_mode, connect_timeout: config[:timeout]) master = if check_vsn_newer_than_postgres9(conn_master) conn_master.exec('SELECT pg_current_xlog_location()').getvalue(0, 0) else conn_master.exec('SELECT pg_current_wal_lsn()').getvalue(0, 0) end m_segbytes = conn_master.exec('SHOW wal_segment_size').getvalue(0, 0).sub(/\D+/, '').to_i << 20 conn_master.close # Establishing connection to the slave conn_slave = PG.connect(host: config[:slave_host], dbname: config[:database], user: config[:user], password: config[:password], port: config[:port], sslmode: ssl_mode, connect_timeout: config[:timeout]) slave = if check_vsn_newer_than_postgres9(conn_slave) conn_slave.exec('SELECT pg_last_xlog_receive_location()').getvalue(0, 0) else conn_slave.exec('SELECT pg_last_wal_replay_lsn()').getvalue(0, 0) end conn_slave.close # Computing lag lag = compute_lag(master, slave, m_segbytes) lag_in_mb = (lag.to_f / 1024 / 1024).abs message = "replication delayed by #{lag_in_mb}MB :: master:#{master} slave:#{slave} m_segbytes:#{m_segbytes}" if lag_in_mb >= config[:crit] critical message elsif lag_in_mb >= config[:warn] warning message else ok message end end end