lib/ring/sqa/alarm.rb in ring-sqa-0.0.19 vs lib/ring/sqa/alarm.rb in ring-sqa-0.0.20
- old
+ new
@@ -1,11 +1,11 @@
require_relative 'alarm/email'
require_relative 'alarm/udp2irc'
require_relative 'alarm/cfg'
+require_relative 'alarm/message'
require_relative 'mtr'
require_relative 'paste'
-require_relative 'nodes_json'
module Ring
class SQA
class Alarm
@@ -28,23 +28,23 @@
end
end
private
- def initialize
+ def initialize nodes
+ @nodes = nodes
@methods = []
@methods << Email.new if CFG.email.to?
@methods << UDP2IRC.new if CFG.irc.password?
+ @hostname = Ring::SQA::CFG.host.name
@alarm = false
- @hostname = (Socket.gethostname rescue 'anonymous')
end
def compose_message alarm_buffer
exceeding_nodes = alarm_buffer.exceeding_nodes
msg = {short: "#{@hostname}: raising alarm - #{exceeding_nodes.size} new nodes down"}
- nodes_json = NodesJSON.new
- exceeding_nodes = exceeding_nodes.map { |node| nodes_json.get node }
+ exceeding_nodes = exceeding_nodes.map { |node| @nodes.get node }
nodes_list = ''
exceeding_nodes.sort_by{ |node| node[:cc] }.each do |node|
nodes_list << "- %-35s %15s AS%-6s %2s\n" % [node[:name], node[:ip], node[:as], node[:cc]]
end
@@ -62,43 +62,10 @@
buffer_list << "%2s min ago %3s measurements failed" % [time, ary.size/2]
buffer_list << (time.to_i < 3 ? " (raised alarm)\n" : " (baseline)\n")
time -= 1
end
- msg[:long] = <<EOF
-Regarding: #{hostname}
-
-This is an automated alert from the distributed partial outage
-monitoring system "RING SQA".
-
-At #{Time.now.utc} the following measurements were analysed
-as indicating that there is a high probability your NLNOG RING node
-cannot reach the entire internet. Possible causes could be an outage
-in your upstream's or peer's network.
-
-The following #{exceeding_nodes.size} nodes previously were reachable, but became unreachable
-over the course of the last 3 minutes:
-
-#{nodes_list}
-
-As a debug starting point 3 traceroutes were launched right after
-detecting the event, they might assist in pinpointing what broke:
-
-#{mtr_list}
-
-An alarm is raised under the following conditions: every 30 seconds
-your node pings all other nodes. The amount of nodes that cannot be
-reached is stored in a circular buffer, with each element representing
-a minute of measurements. In the event that the last three minutes are
-#{Ring::SQA::CFG.analyzer.tolerance} above the median of the previous 27 measurement slots, a partial
-outage is assumed. The ring buffer's output is as following:
-
-#{buffer_list}
-
-Kind regards,
-
-NLNOG RING
-EOF
+ msg[:long] = message nodes_list, mtr_list, buffer_list
msg
end
end