module RequestLogAnalyzer::Tracker
class NumericValue < Base
attr_reader :categories
# Sets up the numeric value tracker. It will check whether the value and category
# options are set that are used to extract and categorize the values during
# parsing. Two lambda procedures are created for these tasks
def prepare
raise "No value field set up for numeric tracker #{self.inspect}" unless options[:value]
raise "No categorizer set up for numeric tracker #{self.inspect}" unless options[:category]
unless options[:multiple]
@categorizer = create_lambda(options[:category])
@valueizer = create_lambda(options[:value])
end
@number_of_buckets = options[:number_of_buckets] || 1000
@min_bucket_value = options[:min_bucket_value] ? options[:min_bucket_value].to_f : 0.000001
@max_bucket_value = options[:max_bucket_value] ? options[:max_bucket_value].to_f : 1_000_000_000
# precalculate the bucket size
@bucket_size = (Math.log(@max_bucket_value) - Math.log(@min_bucket_value)) / @number_of_buckets.to_f
@categories = {}
end
# Get the value information from the request and store it in the respective categories.
#
# If a request can contain multiple usable values for this tracker, the :multiple option
# should be set to true. In this case, all the values and respective categories will be
# read from the request using the #every method from the fields given in the :value and
# :category option.
#
# If the request contains only one suitable value and the :multiple is not set, it will
# read the single value and category from the fields provided in the :value and :category
# option, or calculate it with any lambda procedure that is assigned to these options. The
# request will be passed to procedure as input for the calculation.
#
# @param [RequestLogAnalyzer::Request] request The request to get the information from.
def update(request)
if options[:multiple]
found_categories = request.every(options[:category])
found_values = request.every(options[:value])
raise "Capture mismatch for multiple values in a request" unless found_categories.length == found_values.length
found_categories.each_with_index do |cat, index|
update_statistics(cat, found_values[index]) if cat && found_values[index].kind_of?(Numeric)
end
else
category = @categorizer.call(request)
value = @valueizer.call(request)
update_statistics(category, value) if value.kind_of?(Numeric) && category
end
end
# Block function to build a result table using a provided sorting function.
# output The output object.
# amount The number of rows in the report table (default 10).
# === Options
# * :title The title of the table
# * :sort The key to sort on (:hits, :cumulative, :average, :min or :max)
def report_table(output, sort, options = {}, &block)
output.puts
top_categories = output.slice_results(sorted_by(sort))
output.with_style(:top_line => true) do
output.table(*statistics_header(:title => options[:title], :highlight => sort)) do |rows|
top_categories.each { |(category, _)| rows << statistics_row(category) }
end
end
end
# Display a value
def display_value(value)
return "- " if value.nil?
return "0 " if value.zero?
case [Math.log10(value.abs).floor, 0].max
when 0...4 then '%d ' % value
when 4...7 then '%dk' % (value / 1000)
when 7...10 then '%dM' % (value / 1000_000)
when 10...13 then '%dG' % (value / 1000_000_000)
when 13...16 then '%dT' % (value / 1000_000_000_000)
else '%dP' % (value / 1000_000_000_000_000)
end
end
# Generate a request report to the given output object
# By default colulative and average duration are generated.
# Any options for the report should have been set during initialize.
# output The output object
def report(output)
sortings = output.options[:sort] || [:sum, :mean]
sortings.each do |sorting|
report_table(output, sorting, :title => "#{title} - by #{sorting}")
end
if options[:total]
output.puts
output.puts "#{output.colorize(title, :white, :bold)} - total: " + output.colorize(display_value(sum_overall), :brown, :bold)
end
end
# Returns the title of this tracker for reports
def title
@title ||= begin
if options[:title]
options[:title]
else
title_builder = ""
title_builder << "#{options[:value]} " if options[:value].kind_of?(Symbol)
title_builder << (options[:category].kind_of?(Symbol) ? "per #{options[:category]}" : "per request")
title_builder
end
end
end
# Returns all the categories and the tracked duration as a hash than can be exported to YAML
def to_yaml_object
return nil if @categories.empty?
@categories.each do |cat, info|
info[:stddev] = stddev(cat)
info[:median] = median(cat) if info[:buckets]
info[:interval_95_percent] = percentile_interval(cat, 95) if info[:buckets]
end
@categories
end
# Returns the bucket index for a value
def bucket_index(value)
return 0 if value < @min_bucket_value
return @number_of_buckets - 1 if value >= @max_bucket_value
((Math.log(value) - Math.log(@min_bucket_value)) / @bucket_size).floor
end
# Returns the lower value of a bucket given its index
def bucket_lower_bound(index)
Math.exp((index * @bucket_size) + Math.log(@min_bucket_value))
end
# Returns the upper value of a bucket given its index
def bucket_upper_bound(index)
bucket_lower_bound(index + 1)
end
# Returns the average of the lower and upper bound of the bucket.
def bucket_average_value(index)
(bucket_lower_bound(index) + bucket_upper_bound(index)) / 2
end
# Returns a single value representing a bucket.
def bucket_value(index, type = nil)
case type
when :begin, :start, :lower, :lower_bound; bucket_lower_bound(index)
when :end, :finish, :upper, :upper_bound; bucket_upper_bound(index)
else bucket_average_value(index)
end
end
# Returns the range of values for a bucket.
def bucket_interval(index)
Range.new(bucket_lower_bound(index), bucket_upper_bound(index), true)
end
# Records a hit on a bucket that includes the given value.
def bucketize(category, value)
@categories[category][:buckets][bucket_index(value)] += 1
end
# Returns the upper bound value that would include x% of the hits.
def percentile_index(category, x, inclusive = false)
total_encountered = 0
@categories[category][:buckets].each_with_index do |count, index|
total_encountered += count
percentage = ((total_encountered.to_f / hits(category).to_f) * 100).floor
return index if (inclusive && percentage >= x) || (!inclusive && percentage > x)
end
end
def percentile_indices(category, start, finish)
result = [nil, nil]
total_encountered = 0
@categories[category][:buckets].each_with_index do |count, index|
total_encountered += count
percentage = ((total_encountered.to_f / hits(category).to_f) * 100).floor
if !result[0] && percentage > start
result[0] = index
elsif !result[1] && percentage >= finish
result[1] = index
return result
end
end
end
def percentile(category, x, type = nil)
bucket_value(percentile_index(category, x, type == :upper), type)
end
def median(category)
percentile(category, 50, :average)
end
# Returns a percentile interval, i.e. the lower bound and the upper bound of the values
# that represent the x%-interval for the bucketized dataset.
#
# A 90% interval means that 5% of the values would have been lower than the lower bound and
# 5% would have been higher than the upper bound, leaving 90% of the values within the bounds.
# You can also provide a Range to specify the lower bound and upper bound percentages (e.g. 5..95).
def percentile_interval(category, x)
case x
when Range
lower, upper = percentile_indices(category, x.begin, x.end)
Range.new(bucket_lower_bound(lower), bucket_upper_bound(upper))
when Numeric
percentile_interval(category, Range.new((100 - x) / 2, (100 - (100 - x) / 2)))
else
raise 'What does it mean?'
end
end
# Update the running calculation of statistics with the newly found numeric value.
# category:: The category for which to update the running statistics calculations
# number:: The numeric value to update the calculations with.
def update_statistics(category, number)
@categories[category] ||= { :hits => 0, :sum => 0, :mean => 0.0, :sum_of_squares => 0.0, :min => number, :max => number,
:buckets => Array.new(@number_of_buckets, 0) }
delta = number - @categories[category][:mean]
@categories[category][:hits] += 1
@categories[category][:mean] += (delta / @categories[category][:hits])
@categories[category][:sum_of_squares] += delta * (number - @categories[category][:mean])
@categories[category][:sum] += number
@categories[category][:min] = number if number < @categories[category][:min]
@categories[category][:max] = number if number > @categories[category][:max]
bucketize(category, number)
end
# Get the number of hits of a specific category.
# cat The category
def hits(cat)
@categories[cat][:hits]
end
# Get the total duration of a specific category.
# cat The category
def sum(cat)
@categories[cat][:sum]
end
# Get the minimal duration of a specific category.
# cat The category
def min(cat)
@categories[cat][:min]
end
# Get the maximum duration of a specific category.
# cat The category
def max(cat)
@categories[cat][:max]
end
# Get the average duration of a specific category.
# cat The category
def mean(cat)
@categories[cat][:mean]
end
# Get the standard deviation of the duration of a specific category.
# cat The category
def stddev(cat)
Math.sqrt(variance(cat))
end
# Get the variance of the duration of a specific category.
# cat The category
def variance(cat)
return 0.0 if @categories[cat][:hits] <= 1
(@categories[cat][:sum_of_squares] / (@categories[cat][:hits] - 1))
end
# Get the average duration of a all categories.
def mean_overall
sum_overall / hits_overall
end
# Get the cumlative duration of a all categories.
def sum_overall
@categories.inject(0.0) { |sum, (_, cat)| sum + cat[:sum] }
end
# Get the total hits of a all categories.
def hits_overall
@categories.inject(0) { |sum, (_, cat)| sum + cat[:hits] }
end
# Return categories sorted by a given key.
# by The key to sort on. This parameter can be omitted if a sorting block is provided instead
def sorted_by(by = nil)
if block_given?
categories.sort { |a, b| yield(b[1]) <=> yield(a[1]) }
else
categories.sort { |a, b| send(by, b[0]) <=> send(by, a[0]) }
end
end
# Returns the column header for a statistics table to report on the statistics result
def statistics_header(options)
[
{:title => options[:title], :width => :rest},
{:title => 'Hits', :align => :right, :highlight => (options[:highlight] == :hits), :min_width => 4},
{:title => 'Sum', :align => :right, :highlight => (options[:highlight] == :sum), :min_width => 6},
{:title => 'Mean', :align => :right, :highlight => (options[:highlight] == :mean), :min_width => 6},
{:title => 'StdDev', :align => :right, :highlight => (options[:highlight] == :stddev), :min_width => 6},
{:title => 'Min', :align => :right, :highlight => (options[:highlight] == :min), :min_width => 6},
{:title => 'Max', :align => :right, :highlight => (options[:highlight] == :max), :min_width => 6},
{:title => '95 %tile', :align => :right, :highlight => (options[:highlight] == :percentile_interval), :min_width => 11}
]
end
# Returns a row of statistics information for a report table, given a category
def statistics_row(cat)
[cat, hits(cat), display_value(sum(cat)), display_value(mean(cat)), display_value(stddev(cat)),
display_value(min(cat)), display_value(max(cat)),
display_value(percentile_interval(cat, 95).begin) + '-' + display_value(percentile_interval(cat, 95).end) ]
end
end
end