bin/check-chef-nodes.rb in sensu-plugins-chef-3.0.2 vs bin/check-chef-nodes.rb in sensu-plugins-chef-4.0.0

- old
+ new

@@ -71,37 +71,61 @@ description: 'Node to excludes', short: '-e EXCLUDE-NODES', long: '--exclude-nodes EXCLUDE-NODES', default: '^$' + option :grace_period, + description: 'The ammount of time before a node should be evaluated for failed convergence', + long: '--grace-period SECONDS', + default: (60 * 5), # default 5 minutes, which seems like a good but not great default + proc: proc(&:to_i) + option :ignore_ssl_verification, description: 'Ignore SSL certificate verification', short: '-i', - long: '--ignore-ssl' + long: '--ignore-ssl', + default: false, + boolean: true def connection @connection ||= chef_api_connection end def nodes_last_seen nodes = connection.node.all nodes.delete_if { |node| node.name =~ /#{config[:exclude_nodes]}/ } - nodes.map do |node| + + checked_nodes = [] + nodes.each do |node| node.reload - if node['automatic']['ohai_time'] - { node.name => (Time.now - Time.at(node['automatic']['ohai_time'])) > config[:critical_timespan].to_i } - else - { node.name => true } + # no uptime: node might have not finished convergence -> won't check + unless node['automatic']['uptime_seconds'] + checked_nodes << { node['name'] => false } + next end + + # won't check if node's uptime is still within grace period + unless node['automatic']['uptime_seconds'] > config[:grace_period] + checked_nodes << { node['name'] => false } + next + end + + # compute elapsed time since last convergence + checked_nodes << if node['automatic']['ohai_time'] + { node['name'] => (Time.now - Time.at(node['automatic']['ohai_time'])) > config[:critical_timespan].to_i } + else + { node['name'] => true } + end end + checked_nodes end def run if any_node_stuck? - ok 'Chef Server API is ok, all nodes reporting' - else critical "The following nodes cannot be provisioned: #{failed_nodes_names}" + else + ok 'Chef Server API is ok, all nodes reporting' end end private @@ -115,14 +139,29 @@ Celluloid.boot Ridley.new(server_url: chef_server_url, client_name: client_name, client_key: signing_key_filename, ssl: { verify: verify_ssl }) end def any_node_stuck? + stuck = [] @nodes_last_seen ||= nodes_last_seen - @nodes_last_seen.map(&:values).flatten.all? { |x| x == false } + @nodes_last_seen.flatten.each do |node| + node.each do |name, status| + stuck << name if status == true + end + end + if stuck.empty? + false + else + true + end end def failed_nodes_names - all_failed_tuples = @nodes_last_seen.select { |node_set| node_set.values.first == true } - all_failed_tuples.map(&:keys).flatten.join(', ') + failed_nodes = [] + @nodes_last_seen.flatten.each do |node| + node.each do |name, status| + failed_nodes << name if status == true + end + end + failed_nodes end end