lib/riemann/tools/http_check.rb in riemann-tools-1.6.0 vs lib/riemann/tools/http_check.rb in riemann-tools-1.7.0

- old
+ new

@@ -24,27 +24,90 @@ opt :connection_latency_critical, 'Lattency critical threshold', short: :none, default: 0.25 opt :response_latency_warning, 'Lattency warning threshold', short: :none, default: 0.5 opt :response_latency_critical, 'Lattency critical threshold', short: :none, default: 1.0 opt :http_timeout, 'Timeout (in seconds) for HTTP requests', short: :none, default: 5.0 opt :checks, 'A list of checks to run.', short: :none, type: :strings, default: %w[consistency connection-latency response-code response-latency] + opt :resolvers, 'Run this number of resolver threads', short: :none, type: :integer, default: 5 + opt :workers, 'Run this number of worker threads', short: :none, type: :integer, default: 20 + opt :user_agent, 'User-Agent header for HTTP requests', short: :none, default: "#{File.basename($PROGRAM_NAME)}/#{Riemann::Tools::VERSION} (+https://github.com/riemann/riemann-tools)" + def initialize + @resolve_queue = Queue.new + @work_queue = Queue.new + + opts[:resolvers].times do + Thread.new do + loop do + uri = @resolve_queue.pop + host = uri.host + + addresses = Resolv::DNS.new.getaddresses(host) + if addresses.empty? + host = host[1...-1] if host[0] == '[' && host[-1] == ']' + begin + addresses << IPAddr.new(host) + rescue IPAddr::InvalidAddressError + # Ignore + end + end + + @work_queue.push([uri, addresses]) + end + end + end + + opts[:workers].times do + Thread.new do + loop do + uri, addresses = @work_queue.pop + test_uri_addresses(uri, addresses) + end + end + end + + super + end + def tick + report( + service: 'riemann http-check resolvers utilization', + metric: (opts[:resolvers].to_f - @resolve_queue.num_waiting) / opts[:resolvers], + state: @resolve_queue.num_waiting.positive? ? 'ok' : 'critical', + tags: %w[riemann], + ) + report( + service: 'riemann http-check resolvers saturation', + metric: @resolve_queue.length, + state: @resolve_queue.empty? ? 'ok' : 'critical', + tags: %w[riemann], + ) + report( + service: 'riemann http-check workers utilization', + metric: (opts[:workers].to_f - @work_queue.num_waiting) / opts[:workers], + state: @work_queue.num_waiting.positive? ? 'ok' : 'critical', + tags: %w[riemann], + ) + report( + service: 'riemann http-check workers saturation', + metric: @work_queue.length, + state: @work_queue.empty? ? 'ok' : 'critical', + tags: %w[riemann], + ) + opts[:uri].each do |uri| - test_uri(uri) + @resolve_queue.push(URI(uri)) end end - def test_uri(uri) - uri = URI(uri) - - request = ::Net::HTTP::Get.new(uri) + def test_uri_addresses(uri, addresses) + request = ::Net::HTTP::Get.new(uri, { 'user-agent' => opts[:user_agent] }) request.basic_auth(uri.user, uri.password) responses = [] - with_each_address(uri.host) do |address| - responses << test_uri_address(uri, address, request) + addresses.each do |address| + responses << test_uri_address(uri, address.to_s, request) end responses.compact! return unless opts[:checks].include?('consistency') @@ -113,22 +176,10 @@ rescue StandardError # Ignore this address nil end - def with_each_address(host, &block) - addresses = Resolv::DNS.new.getaddresses(host) - if addresses.empty? - host = host[1...-1] if host[0] == '[' && host[-1] == ']' - addresses << IPAddr.new(host) - end - - addresses.each do |address| - block.call(address.to_s) - end - end - def report_http_endpoint_response_code(http, uri, response) return unless response report( { @@ -154,20 +205,25 @@ }.merge(endpoint_report(http, uri, "#{latency} latency")), ) else report( { - state: 'critical', + state: latency_state(latency, nil), description: 'timeout', }.merge(endpoint_report(http, uri, "#{latency} latency")), ) end end def latency_state(name, latency) - if latency > opts["#{name}_latency_critical".to_sym] + critical_threshold = opts["#{name}_latency_critical".to_sym] + warning_threshold = opts["#{name}_latency_warning".to_sym] + + return if critical_threshold.zero? || warning_threshold.zero? + + if latency.nil? || latency > critical_threshold 'critical' - elsif latency > opts["#{name}_latency_warning".to_sym] + elsif latency > warning_threshold 'warning' else 'ok' end end