bin/postfix-exporter in postfix-exporter-0.1.0 vs bin/postfix-exporter in postfix-exporter-1.0.0
- old
+ new
@@ -1,22 +1,18 @@
#!/usr/bin/env ruby
-require 'rack'
-require 'prometheus/middleware/exporter'
+require 'prometheus/client/rack/exporter'
require 'socket'
+require 'docker'
+require 'rack'
require 'rack/handler/webrick'
require 'logger'
prometheus = Prometheus::Client.registry
-prometheus.gauge(:postfix_exporter_start_time_seconds, "When this process started up").set({}, Time.now.to_f)
+mailq = prometheus.gauge(:postfix_queue_size, "Number of messages in the mail queue")
-oldest = prometheus.gauge(:postfix_oldest_message_timestamp_seconds, "Queue time of the oldest message")
-mailq = prometheus.gauge(:postfix_queue_size, "Number of messages in the mail queue")
-q_err = prometheus.counter(:postfix_queue_processing_error_total, "Exceptions raised whilst scanning the Postfix queue")
-up = prometheus.gauge(:postfix_up, "Whether the master process is running or not")
-
Thread.abort_on_exception = true
Thread.new do
loop do
begin
@@ -25,88 +21,23 @@
end
# deferred is special, because it's often hueg it gets sharded into
# multiple subdirectories
mailq.set({ queue: 'deferred' }, Dir["/var/spool/postfix/deferred/*/*"].size)
+
+ sleep 5
rescue StandardError => ex
$stderr.puts "Error while monitoring queue sizes: #{ex.message} (#{ex.class})"
$stderr.puts ex.backtrace.map { |l| " #{l}" }.join("\n")
- q_err.increment(class: ex.class.to_s, phase: "scan")
+ sleep 1
end
-
- begin
- master_pid = File.read("/var/spool/postfix/pid/master.pid").to_i
-
- if master_pid > 1
- Process.kill(0, master_pid)
- # If we get here, then the process exists, and
- # that'll do for our purposes
- up.set({}, 1)
- else
- up.set({}, 0)
- end
- rescue Errno::ENOENT, Errno::ESRCH, Errno::EACCES
- up.set({}, 0)
- rescue Errno::EPERM
- # Ironically, we don't need to be able to *actually*
- # signal the process; EPERM means it exists and is running
- # as someone more privileged than us, which is enough
- # for our purposes
- up.set({}, 1)
- rescue StandardError => ex
- $stderr.puts "Error while checking master process: #{ex.message} (#{ex.class})"
- $stderr.puts ex.backtrace.map { |l| " #{l}" }.join("\n")
- q_err.increment(class: ex.class.to_s, phase: "up")
- end
-
- sleep 5
-
end
end
-Thread.new do
- earliest_ctime = ->(glob) do
- # There is seemingly no way to unset or remove a gauge metric in the Ruby
- # implementation of the prom exporter. As a hack, we return the current
- # time in cases where there is nothing to sample.
- now = Time.now.to_i
-
- Dir[glob].lazy.map do |n|
- begin
- File.stat(n).ctime.to_i
- rescue Errno::ENOENT
- now
- end
- end.min || now
- end
-
- loop do
- begin
- %w{incoming active corrupt hold}.each do |q|
- oldest.set({ queue: q }, earliest_ctime["/var/spool/postfix/#{q}/*"])
- end
- oldest.set({ queue: 'deferred' }, earliest_ctime["/var/spool/postfix/deferred/*/*"])
- rescue StandardError => ex
- $stderr.puts "Error while sampling message ages: #{ex.message} (#{ex.class})"
- $stderr.puts ex.backtrace.map { |l| " #{l}" }.join("\n")
- q_err.increment(class: ex.class.to_s, phase: "stat")
- end
-
- # stat()ing all the files in a large queue could potentially be quite
- # expensive, so we sample this data less frequently.
- sleep 60
-
- end
-end
-
if ENV["SYSLOG_SOCKET"]
- delays = prometheus.summary(:postfix_delivery_delays, "Distribution of time taken to deliver (or bounce) messages")
- connects = prometheus.counter(:postfix_smtpd_connections_total, "Connections to smtpd")
- active = prometheus.gauge(:postfix_smtpd_active_connections, "Current connections to smtpd")
- incoming = prometheus.counter(:postfix_incoming_delivery_attempts_total, "Delivery attempts, labelled by dsn and status")
- messages = prometheus.counter(:postfix_log_messages_total, "Syslog messages received, labelled by how it was handled")
- log_errors = prometheus.counter(:postfix_log_processing_error_total, "Exceptions raised whilst processing log messages")
+ delays = prometheus.summary(:postfix_delivery_delays, "Distribution of time taken to deliver (or bounce) messages")
+ statuses = prometheus.counter(:postfix_deliveries, "How many messages have been delivered (or bounced)")
Thread.new do
begin
s = Socket.new(Socket::AF_UNIX, Socket::SOCK_DGRAM, 0)
s.bind(Socket.pack_sockaddr_un(ENV["SYSLOG_SOCKET"]))
@@ -116,68 +47,33 @@
end
loop do
begin
msg = s.recvmsg.first
- if msg =~ %r{postfix/.* delay=(\d+(\.\d+)?), .* dsn=(\d+\.\d+\.\d+), status=(\w+)}
+ if msg =~ %r{postfix/smtp.* delay=(\d+(\.\d+)?), .* dsn=(\d+\.\d+\.\d+), status=(\w+)}
delay = $1.to_f
dsn = $3
status = $4
if status == "bounced" or status == "sent"
- delays.observe({dsn: dsn, status: status}, delay)
+ statuses.increment(dsn: dsn, status: status)
+ delays.add({dsn: dsn, status: status}, delay)
end
-
- messages.increment(type: "delay")
- elsif msg =~ %r{postfix/smtpd\[\d+\]: connect from }
- connects.increment({})
- active.send(:synchronize) { active.set({}, active.get({}) || 0 + 1) }
- messages.increment(type: "connect")
- elsif msg =~ %r{postfix/smtpd\[\d+\]: disconnect from }
- active.send(:synchronize) do
- new = (active.get({}) || 0) - 1
- # If we start running mid-stream,
- # we might end up seeing more
- # disconnects than connections,
- # which would be confusing
- new = 0 if new < 0
- active.set({}, new)
- end
- messages.increment(type: "disconnect")
- elsif msg =~ %r{postfix/smtpd\[\d+\]: [A-F0-9]+: client=}
- incoming.increment(dsn: "2.0.0", status: "queued")
- messages.increment(type: "queued")
- elsif msg =~ %r{postfix/smtpd\[\d+\]: NOQUEUE: reject: RCPT from \S+: \d{3} (\d+\.\d+\.\d+) }
- incoming.increment(dsn: $1, status: "rejected")
- messages.increment(type: "noqueue")
- else
- messages.increment(type: "ignored")
end
rescue StandardError => ex
$stderr.puts "Error while receiving postfix logs: #{ex.message} (#{ex.class})"
$stderr.puts ex.backtrace.map { |l| " #{l}" }.join("\n")
- log_errors.increment(class: ex.class.to_s)
sleep 1
end
end
end
end
app = Rack::Builder.new
-app.use Rack::Deflater, if: ->(_, _, _, body) { body.any? && body[0].length > 512 }
-app.use Prometheus::Middleware::Exporter
+app.use Prometheus::Client::Rack::Exporter
app.run ->(env) { [404, {'Content-Type' => 'text/plain'}, ['NOPE NOPE NOPE NOPE']] }
logger = Logger.new($stderr)
logger.level = Logger::INFO
logger.formatter = proc { |s, t, p, m| "WEBrick: #{m}\n" }
-# This is the only way to get the Rack-mediated webrick to listen on both
-# INADDR_ANY and IN6ADDR_ANY on libcs that don't support getaddrinfo("*")
-# (ie musl-libc). Setting `Host: '*'` barfs on the above-mentioned buggy(?)
-# libcs, `Host: '::'` fails on newer rubies (because they use
-# setsockopt(V6ONLY) by default), and with RACK_ENV at its default of
-# "development", it only listens on localhost. And even *this* only works
-# on Rack 2, because before that the non-development default listen address
-# was "0.0.0.0"!
-ENV['RACK_ENV'] = "none"
-Rack::Handler::WEBrick.run app, Port: 9154, Logger: logger, AccessLog: []
+Rack::Handler::WEBrick.run app, Host: '::', Port: 9154, Logger: logger, AccessLog: []