#! /usr/bin/env ruby # # check-es-indices-sizes.rb # # DESCRIPTION: # This check sends a critical event when the indices mathing the date pattern # are above a MB value. # # OUTPUT: # plain text # # PLATFORMS: # Linux # # DEPENDENCIES: # gem: sensu-plugin # gem: elasticsearch # gem: aws_es_transport # # USAGE: # ./check-es-indices-sizes.rb -h localhost -p 9200 -m 155000 # # NOTES: # # LICENSE: # Brendan Leon Gibat # Released under the same terms as Sensu (the MIT license); see LICENSE # for details. # require 'sensu-plugin/check/cli' require 'elasticsearch' require 'aws_es_transport' require 'sensu-plugins-elasticsearch' class ESCheckIndicesSizes < Sensu::Plugin::Check::CLI include ElasticsearchCommon option :transport, long: '--transport TRANSPORT', description: 'Transport to use to communicate with ES. Use "AWS" for signed AWS transports.' option :region, long: '--region REGION', description: 'Region (necessary for AWS Transport)' option :host, description: 'Elasticsearch host', short: '-h HOST', long: '--host HOST', default: 'localhost' option :port, description: 'Elasticsearch port', short: '-p PORT', long: '--port PORT', proc: proc(&:to_i), default: 9200 option :scheme, description: 'Elasticsearch connection scheme, defaults to https for authenticated connections', short: '-s SCHEME', long: '--scheme SCHEME' option :password, description: 'Elasticsearch connection password', short: '-P PASSWORD', long: '--password PASSWORD' option :user, description: 'Elasticsearch connection user', short: '-u USER', long: '--user USER' option :timeout, description: 'Elasticsearch query timeout in seconds', short: '-t TIMEOUT', long: '--timeout TIMEOUT', proc: proc(&:to_i), default: 30 option :used_percent, description: 'Percentage of bytes to use for indices matching pattern.', short: '-a USED_PERCENTAGE', long: '--used-percentage USED_PERCENTAGE', proc: proc(&:to_i), default: 80 option :maximum_megabytes, description: 'Maximum number megabytes for date based indices to use.', short: '-m MAXIMUM_MEGABYTES', long: '--maximum-megabytes MAXIMUM_MEGABYTES', proc: proc(&:to_i), default: 0 option :pattern_regex, description: 'Regular expression to use for matching date based indices. Four named groups are matched, pattern, year, month, day.', short: '-x PATTERN_REGEX', long: '--pattern-regex PATTERN_REGEX', default: '^(?.*)-(?\d\d\d\d)\.(?\d\d?).(?\d\d?)$' option :delete, description: 'Instead of alerting deletes the indicies', short: '-d', long: '--delete', boolean: true, default: false def get_indices_to_delete(starting_date, total_bytes_to_delete, indices_with_sizes) total_bytes_deleted = 0 # TODO: switch from `DateTime` to `Time` or `Date` curr_date = DateTime.now indices_to_delete = [] # We don't delete the current day, as it is most likely being used. while total_bytes_deleted < total_bytes_to_delete && starting_date < curr_date same_day_indices = indices_with_sizes.values.map do |pattern| pattern.select do |index| index[:date] == starting_date end end.flatten same_day_indices.each do |index| if total_bytes_deleted < total_bytes_to_delete indices_to_delete.push(index[:index]) total_bytes_deleted += index[:size] end end starting_date += 1 end indices_to_delete end def build_indices_with_sizes indices_fs_stats = client.indices.stats store: true pattern_regex = Regexp.new(config[:pattern_regex]) index_with_sizes = indices_fs_stats['indices'].keys.each_with_object({}) do |key, hash| matching_index = pattern_regex.match(key) unless matching_index.nil? base_pattern = matching_index[:pattern] unless base_pattern.nil? unless hash.include?(base_pattern) hash[base_pattern] = [] end index_date = DateTime.new(matching_index[:year].to_i, matching_index[:month].to_i, matching_index[:day].to_i) hash[base_pattern].push( size: indices_fs_stats['indices'][key]['total']['store']['size_in_bytes'].to_i, date: index_date, index: key ) end end end index_with_sizes end def run node_fs_stats = client.nodes.stats metric: 'fs,indices' nodes_being_used = node_fs_stats['nodes'].values.select { |node| node['indices']['store']['size_in_bytes'] > 0 } # TODO: come back and cleanup all these rubocop disables with a little refactor # rubocop:disable Metrics/LineLength used_in_bytes = nodes_being_used.map { |node| node['fs']['data'].map { |data| data['total_in_bytes'] - data['available_in_bytes'] }.flatten }.flatten.inject { |sum, x| sum + x } total_in_bytes = nodes_being_used.map { |node| node['fs']['data'].map { |data| data['total_in_bytes'] }.flatten }.flatten.inject { |sum, x| sum + x } # rubocop:enable Metrics/LineLength if config[:maximum_megabytes] > 0 target_bytes_used = config[:maximum_megabytes] * 1_000_000 else if config[:used_percent] > 100 || config[:used_percent] < 0 critical 'You can not make used-percentages greater than 100 or less than 0.' end target_bytes_used = (total_in_bytes.to_f * (config[:used_percent].to_f / 100.0)).to_i end total_bytes_to_delete = used_in_bytes - target_bytes_used if total_bytes_to_delete <= 0 ok "Used space in bytes: #{used_in_bytes}, Total in bytes: #{total_in_bytes}" end indices_with_sizes = build_indices_with_sizes oldest = indices_with_sizes.values.flatten.map { |index| index[:date] }.min indices_to_delete = get_indices_to_delete(oldest, total_bytes_to_delete, indices_with_sizes) if config[:delete] client.indices.delete index: indices_to_delete ok "Cleaned up space: #{total_bytes_to_delete}" else critical "Not enough space, #{total_bytes_to_delete} bytes need to be deleted. Used space in bytes: " \ "#{used_in_bytes}, Total in bytes: #{total_in_bytes}. Indices to delete: " \ "#{indices_to_delete.sort.map { |i| "INDEX[#{i}]" }.join(', ')}" end end end