module RequestLogAnalyzer::Tracker
class NumericValue < Base
attr_reader :categories
# Sets up the numeric value tracker. It will check whether the value and category
# options are set that are used to extract and categorize the values during
# parsing. Two lambda procedures are created for these tasks
def prepare
fail "No value field set up for numeric tracker #{inspect}" unless options[:value]
fail "No categorizer set up for numeric tracker #{inspect}" unless options[:category]
unless options[:multiple]
@categorizer = create_lambda(options[:category])
@valueizer = create_lambda(options[:value])
end
@number_of_buckets = options[:number_of_buckets] || 1000
@min_bucket_value = options[:min_bucket_value] ? options[:min_bucket_value].to_f : 0.000001
@max_bucket_value = options[:max_bucket_value] ? options[:max_bucket_value].to_f : 1_000_000_000
# precalculate the bucket size
@bucket_size = (Math.log(@max_bucket_value) - Math.log(@min_bucket_value)) / @number_of_buckets.to_f
@categories = {}
end
# Get the value information from the request and store it in the respective categories.
#
# If a request can contain multiple usable values for this tracker, the :multiple option
# should be set to true. In this case, all the values and respective categories will be
# read from the request using the #every method from the fields given in the :value and
# :category option.
#
# If the request contains only one suitable value and the :multiple is not set, it will
# read the single value and category from the fields provided in the :value and :category
# option, or calculate it with any lambda procedure that is assigned to these options. The
# request will be passed to procedure as input for the calculation.
#
# @param [RequestLogAnalyzer::Request] request The request to get the information from.
def update(request)
if options[:multiple]
found_categories = request.every(options[:category])
found_values = request.every(options[:value])
fail 'Capture mismatch for multiple values in a request' unless found_categories.length == found_values.length
found_categories.each_with_index do |cat, index|
update_statistics(cat, found_values[index]) if cat && found_values[index].is_a?(Numeric)
end
else
category = @categorizer.call(request)
value = @valueizer.call(request)
update_statistics(category, value) if (value.is_a?(Numeric) || value.is_a?(Array)) && category
end
end
# Block function to build a result table using a provided sorting function.
# output The output object.
# amount The number of rows in the report table (default 10).
# === Options
# * :title The title of the table
# * :sort The key to sort on (:hits, :cumulative, :average, :min or :max)
def report_table(output, sort, options = {}, &_block)
output.puts
top_categories = output.slice_results(sorted_by(sort))
output.with_style(top_line: true) do
output.table(*statistics_header(title: options[:title], highlight: sort)) do |rows|
top_categories.each { |(category, _)| rows << statistics_row(category) }
end
end
end
# Display a value
def display_value(value)
return '- ' if value.nil?
return '0 ' if value.zero?
case [Math.log10(value.abs).floor, 0].max
when 0...4 then '%d ' % value
when 4...7 then '%dk' % (value / 1000)
when 7...10 then '%dM' % (value / 1_000_000)
when 10...13 then '%dG' % (value / 1_000_000_000)
when 13...16 then '%dT' % (value / 1_000_000_000_000)
else '%dP' % (value / 1_000_000_000_000_000)
end
end
# Generate a request report to the given output object
# By default colulative and average duration are generated.
# Any options for the report should have been set during initialize.
# output The output object
def report(output)
sortings = output.options[:sort] || [:sum, :mean]
sortings.each do |sorting|
report_table(output, sorting, title: "#{title} - by #{sorting}")
end
if options[:total]
output.puts
output.puts "#{output.colorize(title, :white, :bold)} - total: " + output.colorize(display_value(sum_overall), :brown, :bold)
end
end
# Returns the title of this tracker for reports
def title
@title ||= begin
if options[:title]
options[:title]
else
title_builder = ''
title_builder << "#{options[:value]} " if options[:value].is_a?(Symbol)
title_builder << (options[:category].is_a?(Symbol) ? "per #{options[:category]}" : 'per request')
title_builder
end
end
end
# Returns all the categories and the tracked duration as a hash than can be exported to YAML
def to_yaml_object
return nil if @categories.empty?
@categories.each do |cat, info|
info[:stddev] = stddev(cat)
info[:median] = median(cat) if info[:buckets]
info[:interval_95_percent] = percentile_interval(cat, 95) if info[:buckets]
end
@categories
end
# Returns the bucket index for a value
def bucket_index(value)
return 0 if value < @min_bucket_value
return @number_of_buckets - 1 if value >= @max_bucket_value
((Math.log(value) - Math.log(@min_bucket_value)) / @bucket_size).floor
end
# Returns the lower value of a bucket given its index
def bucket_lower_bound(index)
Math.exp((index * @bucket_size) + Math.log(@min_bucket_value))
end
# Returns the upper value of a bucket given its index
def bucket_upper_bound(index)
bucket_lower_bound(index + 1)
end
# Returns the average of the lower and upper bound of the bucket.
def bucket_average_value(index)
(bucket_lower_bound(index) + bucket_upper_bound(index)) / 2
end
# Returns a single value representing a bucket.
def bucket_value(index, type = nil)
case type
when :begin, :start, :lower, :lower_bound then bucket_lower_bound(index)
when :end, :finish, :upper, :upper_bound then bucket_upper_bound(index)
else bucket_average_value(index)
end
end
# Returns the range of values for a bucket.
def bucket_interval(index)
Range.new(bucket_lower_bound(index), bucket_upper_bound(index), true)
end
# Records a hit on a bucket that includes the given value.
def bucketize(category, value)
@categories[category][:buckets][bucket_index(value)] += 1
end
# Returns the upper bound value that would include x% of the hits.
def percentile_index(category, x, inclusive = false)
total_encountered = 0
@categories[category][:buckets].each_with_index do |count, index|
total_encountered += count
percentage = ((total_encountered.to_f / hits(category).to_f) * 100).floor
return index if (inclusive && percentage >= x) || (!inclusive && percentage > x)
end
end
def percentile_indices(category, start, finish)
result = [nil, nil]
total_encountered = 0
@categories[category][:buckets].each_with_index do |count, index|
total_encountered += count
percentage = ((total_encountered.to_f / hits(category).to_f) * 100).floor
if !result[0] && percentage > start
result[0] = index
elsif !result[1] && percentage >= finish
result[1] = index
return result
end
end
end
def percentile(category, x, type = nil)
bucket_value(percentile_index(category, x, type == :upper), type)
end
def median(category)
percentile(category, 50, :average)
end
# Returns a percentile interval, i.e. the lower bound and the upper bound of the values
# that represent the x%-interval for the bucketized dataset.
#
# A 90% interval means that 5% of the values would have been lower than the lower bound and
# 5% would have been higher than the upper bound, leaving 90% of the values within the bounds.
# You can also provide a Range to specify the lower bound and upper bound percentages (e.g. 5..95).
def percentile_interval(category, x)
case x
when Range
lower, upper = percentile_indices(category, x.begin, x.end)
Range.new(bucket_lower_bound(lower), bucket_upper_bound(upper))
when Numeric
percentile_interval(category, Range.new((100 - x) / 2, (100 - (100 - x) / 2)))
else
fail 'What does it mean?'
end
end
# Update the running calculation of statistics with the newly found numeric value.
# category:: The category for which to update the running statistics calculations
# number:: The numeric value to update the calculations with.
def update_statistics(category, number)
return number.map { |n| update_statistics(category, n) } if number.is_a?(Array)
@categories[category] ||= { hits: 0, sum: 0, mean: 0.0, sum_of_squares: 0.0, min: number, max: number,
buckets: Array.new(@number_of_buckets, 0) }
delta = number - @categories[category][:mean]
@categories[category][:hits] += 1
@categories[category][:mean] += (delta / @categories[category][:hits])
@categories[category][:sum_of_squares] += delta * (number - @categories[category][:mean])
@categories[category][:sum] += number
@categories[category][:min] = number if number < @categories[category][:min]
@categories[category][:max] = number if number > @categories[category][:max]
bucketize(category, number)
end
# Get the number of hits of a specific category.
# cat The category
def hits(cat)
@categories[cat][:hits]
end
# Get the total duration of a specific category.
# cat The category
def sum(cat)
@categories[cat][:sum]
end
# Get the minimal duration of a specific category.
# cat The category
def min(cat)
@categories[cat][:min]
end
# Get the maximum duration of a specific category.
# cat The category
def max(cat)
@categories[cat][:max]
end
# Get the average duration of a specific category.
# cat The category
def mean(cat)
@categories[cat][:mean]
end
# Get the standard deviation of the duration of a specific category.
# cat The category
def stddev(cat)
Math.sqrt(variance(cat))
end
# Get the variance of the duration of a specific category.
# cat The category
def variance(cat)
return 0.0 if @categories[cat][:hits] <= 1
(@categories[cat][:sum_of_squares] / (@categories[cat][:hits] - 1))
end
# Get the average duration of a all categories.
def mean_overall
sum_overall / hits_overall
end
# Get the cumlative duration of a all categories.
def sum_overall
@categories.reduce(0.0) { |sum, (_, cat)| sum + cat[:sum] }
end
# Get the total hits of a all categories.
def hits_overall
@categories.reduce(0) { |sum, (_, cat)| sum + cat[:hits] }
end
# Return categories sorted by a given key.
# by The key to sort on. This parameter can be omitted if a sorting block is provided instead
def sorted_by(by = nil)
if block_given?
categories.sort { |a, b| yield(b[1]) <=> yield(a[1]) }
else
categories.sort { |a, b| send(by, b[0]) <=> send(by, a[0]) }
end
end
# Returns the column header for a statistics table to report on the statistics result
def statistics_header(options)
[
{ title: options[:title], width: :rest },
{ title: 'Hits', align: :right, highlight: (options[:highlight] == :hits), min_width: 4 },
{ title: 'Sum', align: :right, highlight: (options[:highlight] == :sum), min_width: 6 },
{ title: 'Mean', align: :right, highlight: (options[:highlight] == :mean), min_width: 6 },
{ title: 'StdDev', align: :right, highlight: (options[:highlight] == :stddev), min_width: 6 },
{ title: 'Min', align: :right, highlight: (options[:highlight] == :min), min_width: 6 },
{ title: 'Max', align: :right, highlight: (options[:highlight] == :max), min_width: 6 },
{ title: '95 %tile', align: :right, highlight: (options[:highlight] == :percentile_interval), min_width: 11 }
]
end
# Returns a row of statistics information for a report table, given a category
def statistics_row(cat)
[cat, hits(cat), display_value(sum(cat)), display_value(mean(cat)), display_value(stddev(cat)),
display_value(min(cat)), display_value(max(cat)),
display_value(percentile_interval(cat, 95).begin) + '-' + display_value(percentile_interval(cat, 95).end)]
end
end
end