# A template for exporting metrics for prometheus using consul-templaterb # This template can be configure the following way with environment variables # Environment variables to filter services/instances # SERVICES_TAG_FILTER: basic tag filter for service (default HTTP) # INSTANCE_MUST_TAG: Second level of filtering (optional, default to SERVICES_TAG_FILTER) # INSTANCE_EXCLUDE_TAG: Exclude instances having the given tag # EXCLUDE_SERVICES: comma-separated services regexps to exclude (example: lbl7.*,netsvc-probe.*,consul-probed.*) # PROMETHEUS_EXPORTED_SERVICE_META: comma-separated list of meta to export into metrics of service <% service_metas_to_export = (ENV['PROMETHEUS_EXPORTED_SERVICE_META'] || 'OWNERS,version,os,criteo_flavor,node_group').split(',') service_tag_filter = ENV['SERVICES_TAG_FILTER'] || nil instance_must_tag = ENV['INSTANCE_MUST_TAG'] || service_tag_filter instance_exclude_tag = ENV['INSTANCE_EXCLUDE_TAG'] # Services to hide services_blacklist_raw = (ENV['EXCLUDE_SERVICES'] || 'lbl7.*,netsvc-probe.*,consul-probed.*').split(',') services_blacklist = services_blacklist_raw.map { |v| Regexp.new(v) } backends = {} all_stats = {} service_count = 0 statuses = agent_members().map {|m| m.status}.reduce({}) do |sum, s| v = sum[s] || 0 sum[s] = v + 1 sum end %> # HELP consul_members_count A gauge of number of serf members with their count and their status('alive', 'leaving', 'left', 'failed') # TYPE consul_members_count gauge <% statuses.each do |k, v| %> consul_members_count{serf="lan",status="<%= k %>"} <%= v %><% end services(tag: service_tag_filter).each do |service_name, tags| if !services_blacklist.any? {|r| r.match(service_name)} && (instance_must_tag.nil? || tags.include?(instance_must_tag)) service_count += 1 srv = service(service_name) all_stats[service_name] = srv.endpoint.stats srv.each do |snode| key = "#{snode['Service']['Service']}" metas = {} service_kind = snode['Service']['Kind'] metas['__service_kind'] = service_kind if service_kind service_metas_to_export.each do |k| meta = snode.service_or_node_meta_value(k) if meta key += "::#{k}=#{meta}" metas[k] = meta end end back = backends[key] if back.nil? back = { metas: metas, service_name: service_name, state: { 'passing' => { instances: 0, weight: 0 }, 'warning' => { instances: 0, weight: 0 }, 'critical' => { instances: 0, weight: 0 } } } backends[key] = back end state = snode.status # Increment instance number. back[:state][state][:instances] += 1 # Increment total weight regarding current node's state. if state == 'passing' back[:state][state][:weight] += snode['Service']['Weights']['Passing'] elsif state == 'warning' back[:state][state][:weight] += snode['Service']['Weights']['Warning'] end end end end %> # HELP consul_datacenters_count A gauge of number of datacenters available # TYPE consul_nodes_count gauge consul_datacenters_count <%= datacenters().count %> # HELP consul_nodes_count A gauge of number of nodes in the cluster # TYPE consul_nodes_count gauge consul_nodes_count <%= nodes().count %> # HELP consul_services_count A gauge of the number of services # TYPE consul_services_count gauge consul_services_count <%= service_count %> <% %i[success errors bytes_read changes network_bytes].each do |sym| %> # HELP consul_net_info_<%= sym.to_s %> consul-templaterb global stats for <%= sym.to_s %> # TYPE consul_net_info_<%= sym.to_s %> counter consul_net_info_<%= sym.to_s %> <%= @net_info[sym] %> <% end %> # HELP consul_service_count A gauge of number instances of service with their current state # TYPE consul_service_count gauge # HELP consul_service_stats_requests_total A counter of requests performed by consul-templaterb, shows activity on a service # TYPE consul_service_stats_requests_total counter # HELP consul_service_stats_requests_bytes A counter of bytes transfered from Consul Agent to consul-templaterb # TYPE consul_service_stats_requests_bytes counter # HELP consul_service_stats_requests_bytes_per_sec A counter of bytes transfered from Consul Agent to consul-templaterb # TYPE consul_service_stats_requests_bytes_per_sec gauge # HELP consul_service_changes_total A counter of changes that occured on a service # TYPE consul_service_changes_total counter <% def escape_meta(val) val.gsub('"', '\"') end json_backends = {} now = Time.now.utc backends.each_pair do |k, service_info| service_name = service_info[:service_name] meta_string = "" service_info[:metas].each_pair do |k,v| meta_string+=",#{k}=\"#{escape_meta(v)}\"" end service_info[:state].each_pair do |state_name, state_count| %>consul_service_count{service="<%= service_name %>",state="<%= state_name %>"<%= meta_string %>} <%= state_count[:instances] %> consul_service_weight{service="<%= service_name %>",state="<%= state_name %>"<%= meta_string %>} <%= state_count[:weight] %> <% end end # Global statistics all_stats.each_pair do |service_name, current_stats| %>consul_service_stats_requests_total{service="<%= escape_meta(service_name) %>",type="success"} <%= current_stats.successes %> <% if current_stats.errors > 0 %>consul_service_stats_requests_total{service="<%= escape_meta(service_name) %>",type="errors"} <%= current_stats.errors %> <% end %>consul_service_stats_requests_bytes{service="<%= escape_meta(service_name) %>"} <%= current_stats.body_bytes %> consul_service_stats_requests_bytes_per_sec{service="<%= escape_meta(service_name) %>"} <%= current_stats.bytes_per_sec(now) %> consul_service_changes_total{service="<%= escape_meta(service_name) %>"} <%= current_stats.changes %> <% end %>