module BigBench
module PostProcessor
# The environment in which the post processors are evaluated. Every method defined here is available in
# the post_process block and run! methods of the predefined post processors
module Environment
# Resets the whole post processor environment
def self.reset!
@@clusters, @@statistics, @@normal_distribution, @@regressions, @@appearings = {}, {}, {}, {}, {}
@@trackings = []
@@scope = :all
end
reset!
# Adding the sum and average methods to the default array
class ::Array
def sum
reduce(:+).to_f
end
def average
result = sum / size.to_f
result.nan? ? 0 : result
end
end
# Is raised when a benchmark scope block doesn't find the desired benchmark
class BenchmarkNotFound < StandardError
def initialize(unexistant_benchmark)
@unexistant_benchmark = unexistant_benchmark
end
def message
"Could not find Benchmark: '#{@unexistant_benchmark}'. Available benchmarks are: #{BigBench.benchmarks.map{ |b| b.name }.join(', ')}"
end
end
# Iterates through every tracking and returns a tracking hash of the following form:
#
# {
# :elapsed => 2.502132,
# :start => 1333986292.1755981,
# :stop => 1333986293.618884,
# :duration => 1443,
# :benchmark => "index page",
# :url => "http://www.google.de/",
# :path => "/",
# :method => "get",
# :status => 200
# }
#
def each_tracking
File.open(BigBench.config.output, "r+") do |file|
file.each_line { |line| yield JSON.parse(line).inject({}){|memo,(k,v)| memo[k.to_sym] = v; memo} unless line.blank? }
end
end
# Puts all tracking hashes into a huge array. Warning, this method call might take quite long!
# The results are cached, so you can call trackings in the future without any pain
def trackings
return @@trackings unless @@trackings.empty?
each_tracking{ |tracking| @@trackings << tracking }
@@trackings
end
# Returns the current scope the environment works in
def scope
@@scope
end
# Executes the including methods in the scope of the benchmark:
#
# # For the "index page" benchmark
# scope_to_benchmark "index page" do
# cluster.durations
# cluster.requests
# end
#
def scope_to_benchmark name
raise BenchmarkNotFound.new(name) unless BigBench.benchmarks.map{|b| b.name }.include?(name)
@@scope = name
yield
@@scope = nil
end
# Iterates over all benchmarks and automatically executes all methods in the benchmark scope like this:
#
# # For all benchmarks
# cluster.durations
# cluster.requests
#
# # For each benchmark
# each_benchmark do |benchmark|
# cluster.durations
# cluster.requests
# end
#
def each_benchmark
BigBench.benchmarks.each do |benchmark|
scope_to_benchmark(benchmark.name) do
yield benchmark
end
end
end
# Returns a clustered overview of all trackings. By default the trackings are clustered by second, but you
# can also specify any ammount of seconds to group together. A cluster then has the following methods:
#
# # Duration was 120 seconds
# cluster.timesteps # => [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,..., 120] (seconds)
# cluster.durations # => [50.3, 51.2, 40.3, 51.3, 50.3, 55.3, 52.3, 50.3, 51.3, 50.3, 54.3,..., 50.3] (average duration in milliseconds)
# cluster.requests # => [580, 569, 540, 524, 524, 525, 528, 520, 529, 527, 523,..., 524] (requests in that second)
# cluster.methods(:get) # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (GET requests in that second)
# cluster.methods(:post) # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (POST requests in that second)
# cluster.statuses(200) # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (successful - requests in that second)
# cluster.statuses(404) # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (not founds - requests in that second)
# cluster.paths("/") # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (requests to a path in that second)
# cluster.paths("/home") # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (requests to "/home" path in that second)
# cluster.benchmark("index") # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (requests from the index benchmark in that second)
# cluster.benchmark("user") # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (requests from the user benchmark in that second)
#
# # Duration was 120 seconds = 2 minutes
# cluster(1.minute).timesteps # => [0, 1] (minutes)
# cluster(1.minute).durations # => [50.3, 51.2] (average duration in milliseconds)
# cluster(1.minute).requests # => [27836, 27684] (requests in that minute)
#
def cluster(timebase = 1.second, extra_scope = nil)
cluster_scope = extra_scope || scope
timebase_and_scope = [timebase.to_i, cluster_scope]
return @@clusters[timebase_and_scope] unless @@clusters[timebase_and_scope].nil?
@@clusters[timebase_and_scope] = Cluster.new(timebase, cluster_scope)
end
# Returns an array of appearing attributes in the selected tracking scope.
#
# appearing.statuses # => [200, 404]
# appearing.methods # => ["get", "post"]
# appearing.paths # => ["/", "/basic/auth"
#
def appearing(timebase = 1.second)
return @@appearings[scope] unless @@appearings[scope].nil?
@@appearings[scope] ||= Appearings.new(scope)
end
# Returns the default statistics for a given attribute. The following statistics are available:
#
# statistics.durations.max # => 78.2
# statistics.durations.min # => 12.3
#
# statistics.durations.mean # => 45.2
# statistics.durations.average # => 45.2
#
# statistics.durations.standard_deviation # => 11.3
# statistics.durations.sd # => 11.3
#
# statistics.durations.squared_deviation # => 60.7
# statistics.durations.variance # => 60.7
#
def statistics(timebase = 1.second)
timebase_and_scope = [timebase.to_i, scope]
return @@statistics[timebase_and_scope] unless @@statistics[timebase_and_scope].nil?
@@statistics[timebase_and_scope] ||= AttributeCluster.new(Statistics, :timebase => timebase, :scope => scope)
# Duration is the only value that shouldn't be clustered in the statistics because we have real float values in it, and do not only count +1
# for the request. Because of this, we'll exchange the clustered durations y values with an array of all unclustered tracking durations
@@statistics[timebase_and_scope].instance_eval do
@durations.instance_eval do
@y = trackings.map{ |tracking| tracking[:duration] if tracking[:benchmark] == scope or scope == :all }.compact
end
end
@@statistics[timebase_and_scope]
end
# Returns a gaussian distribution for the specified attribute. It automatically calculates the neccessary mean and variance values and adapts
# the x values to fit the bell curve best.
#
# normal_distribution.durations.x # => [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ...]
# normal_distribution.durations.y # => [0, 0, 7, 8, 9, 10, 11, 10, 9, 8, 7, 4, ...]
#
def normal_distribution(timebase = 1.second)
timebase_and_scope = [timebase.to_i, scope]
return @@normal_distribution[timebase_and_scope] unless @@normal_distribution[timebase_and_scope].nil?
@@normal_distribution[timebase_and_scope] ||= AttributeCluster.new(NormalDistribution, :timebase => timebase, :scope => scope)
# Duration is the only value that shouldn't be clustered in the statistics because we have real float values in it, and do not only count +1
# for the request. Because of this, we'll exchange the clustered durations y values with an array of all unclustered tracking durations
@@normal_distribution[timebase_and_scope].instance_eval do
@durations.instance_eval do
@y = trackings.map{ |tracking| tracking[:duration] if tracking[:benchmark] == scope or scope == :all }.compact
end
end
@@normal_distribution[timebase_and_scope]
end
# Returns a polynomial regression of a degree, a derivation and a timebase. Possible options are:
#
# [:degree] By default the degree is 1 which results in a linear regression. There's no limit to the degree.
# [:derivation] By default the normal function, which means no derivation is returned. Currently only the first derivation is supported.
# [:timebase] By default the cluster size is 1.second. Any timelimit can be added here, e.g. 1.minute
#
#
# # Return a linear regression for the durations, clustered by seconds
# polynomial_regression.durations.y
# polynomial_regression(:degree => 1, :timebase => 1.second).durations.y
# polynomial_regression(:degree => 1, :timebase => 1.second).durations.derivation(0)
#
# # Return the first derivation of the linear regression for the durations, clustered by seconds
# polynomial_regression(:degree => 1).durations.derivation(1)
# polynomial_regression(:degree => 1, :timebase => 1.second).durations.derivation(1)
#
# # Return a second degree polynomial regression for the durations, clustered by seconds
# polynomial_regression(:degree => 2).durations.derivation(0)
# polynomial_regression(:degree => 2, :timebase => 1.second).durations.derivation(0)
#
# # Return the first derivation of the second degree polynomial regression for the durations, clustered by seconds
# polynomial_regression(:degree => 2).durations.derivation(0)
# polynomial_regression(:degree => 2, :timebase => 1.second).durations.derivation(0)
#
def polynomial_regression(new_options = {})
options = { :degree => 1, :timebase => 1.second }.merge(new_options)
degree_and_timebase_and_scope = [options[:degree], options[:timebase].to_i, scope]
return @@regressions[degree_and_timebase_and_scope] unless @@regressions[degree_and_timebase_and_scope].nil?
@@regressions[degree_and_timebase_and_scope] ||= AttributeCluster.new(PolynomialRegression, :timebase => options[:timebase], :scope => scope, :degree => options[:degree])
end
# Calculates the default statistic values for a given attribute, like the duration, requests, etc.
class Statistics
include Environment
attr_reader :x
attr_reader :y
def initialize x, y, degree
@x, @y = x, y
end
# The maximum of the attribute
def max
@y.max
end
# The minimum of the attribute
def min
@y.min
end
# The mean or average of the attribute
def mean
@y.average
end
alias_method :average, :mean
# The standard deviation or sd of the attribute
def standard_deviation
u = mean
@y.inject(0){ |result, element| result + (element - u).abs }.to_f / @y.size.to_f
end
alias_method :sd, :standard_deviation
# The squared deviation or variance of the attribute
def squared_deviation
u = mean
@y.inject(0){ |result, element| result + (element - u) ** 2 }.to_f / @y.size.to_f
end
alias_method :variance, :squared_deviation
end
# Calculates a gaussion normal distribution with the mean a variance of the supplied y values
class NormalDistribution
include Environment
attr_reader :mean
attr_reader :sd
attr_reader :x
attr_reader :formula
def initialize x, y, degree
@mean, @x = y.average, x
@sd = y.inject(0){ |result, element| result + (element - @mean).abs }.to_f / y.size.to_f
# Setup functions that map the guassian normal distribution
@distribution = lambda { |x| 1 / Math.sqrt(2 * Math::PI * @sd) * Math::E**( -0.5 * (x - @mean)**2 / @sd) }
# Setup x to match the center mean
@x = []
upper_limit = @mean + (6 * @sd)
lower_limit = @mean - (6 * @sd)
delta_limit = upper_limit - lower_limit
steps = 120
step_size = delta_limit / steps
steps.times{ |step| @x << (step * step_size) + lower_limit }
# Store formula for printing
@formula = "1 / sqrt(2 * pi * #{@sd}) * e**( -0.5 * (x - #{@mean})**2 / #{@sd})"
end
# Returns an array with the distribution values like this:
#
# [0.0, 0.1, 1.2, 4.5, 10.8, 4.5, 1.2, 0.1, 0.0]
#
def y
@x.map{ |x| @distribution.call(x).round(3) }
end
end
# Clusters the trackings in the specified timebase. By default everything is clustered by seconds.
class Cluster
include Environment
# Allows the registering of multi dimensioned attributes and 0s out values that aren't present
#
# attr_multi_dimension_reader :methods
#
def self.attr_multi_dimension_reader(attribute)
define_method(attribute) do |appearance|
variable = instance_variable_get("@#{attribute}".to_sym)
variable.key?(appearance.to_s) ? variable[appearance.to_s] : timesteps.dup.fill(0)
end
end
attr_reader :timesteps
attr_reader :durations
attr_reader :requests
attr_multi_dimension_reader :methods
attr_multi_dimension_reader :statuses
attr_multi_dimension_reader :paths
def initialize(timebase = 1.second, scope = :all)
@timesteps, @durations, @durations_array, @requests, @methods, @statuses, @paths, @scope = [], [], [], [], {}, {}, {}, scope
# Single dimension attributes
steps = BigBench.config.duration.to_i / timebase
(0..steps).to_a.each do |timestep|
@timesteps[timestep] = timestep
@durations_array[timestep] = []
@requests[timestep] = 0
end
# Multi dimension attributes
[:methods, :statuses, :paths].each do |attribute|
appearing.send(attribute).each do |appearance|
variable = instance_variable_get("@#{attribute}".to_sym)
variable[appearance.to_s] = []
@timesteps.each { |timestep| variable[appearance.to_s][timestep] = 0 }
end
end
# Cluster trackings
trackings.each do |tracking|
next if !(tracking[:benchmark] == scope or scope == :all)
timestep = tracking[:elapsed].to_i / timebase
@durations_array[timestep] << tracking[:duration]
@requests[timestep] += 1
@methods[tracking[:method].to_s][timestep] += 1
@statuses[tracking[:status].to_s][timestep] += 1
@paths[tracking[:path].to_s][timestep] += 1
end
# Compute mean of durations
@timesteps.each do |timestep|
@durations[timestep] = @durations_array[timestep].average
end
end
end
# Lists the appearing attributes for a scope
class Appearings
include Environment
attr_reader :methods
attr_reader :statuses
attr_reader :paths
def initialize scope = :all
@methods, @statuses, @paths = [], [], []
trackings.each do |tracking|
next if !(tracking[:benchmark] == scope or scope == :all)
# Add appearing attributes
@methods << tracking[:method]
@statuses << tracking[:status]
@paths << tracking[:path]
end
# Unique attributes
@methods.uniq!
@statuses.uniq!
@paths.uniq!
end
end
# This class performs the actual regression for a specfied degree and timebase. As x it returns the timebase
# values for the corresponding timebase - e.g. the seconds - and as y it returns the corresponding regression
# values. Additionally it offers the derivations for the regressions with the deviation method.
class PolynomialRegression
# An array with the seconds in the timebase
#
# [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
#
attr_reader :x
attr_reader :degree
# Returns the coefficients calculated for the regression and the degree like
#
# [3.428734, 1.176235]
#
attr_reader :coefficients
def initialize x, y, degree
@x, @degree, @derivations, @formulas = x, degree, [], []
raise "Regression is not possible for a single time value, choose a smaller timebase" if @x.size <= 1
# Perform regression
x_data = x.map { |xi| (0..degree).map { |pow| (xi**pow).to_f } }
mx = ::Matrix[*x_data]
my = ::Matrix.column_vector(y)
# Calculate coefficients
@coefficients = ((mx.t * mx).inv * mx.t * my).transpose.to_a[0]
# Setup functions that map the actual polynom
@derivations << lambda { |x| (0..@degree).to_a.inject(0) { |result, d| result + (@coefficients[d] * x**d) }}
@derivations << lambda { |x| (1..@degree).to_a.inject(0) { |result, d| result + (d * (@coefficients[d] * x**(d-1))) }}
# Store formulas for printing
@formulas << (0..@degree).to_a.map { |d| d == 0 ? @coefficients[d] : "#{@coefficients[d]}x^#{d}" }
@formulas << (1..@degree).to_a.map { |d| d == 1 ? @coefficients[d] : "#{d}*#{@coefficients[d]}x^#{d-1}" }
end
# Returns an array with the computed y-values for the corresponding derivation. The default derivation is the
# 0. derivation which is the original regression that is equal to the y method. The result looks like this:
#
# [2.5, 2.6, 2.7, 2.8, 3.0, 3.2, 3.4, 3.8, 4.0, 4.9]
#
def derivation(derivation = 0)
@x.map{ |x| @derivations[derivation].call(x) }
end
# Returns an array with the regression values like this:
#
# [2.5, 2.6, 2.7, 2.8, 3.0, 3.2, 3.4, 3.8, 4.0, 4.9]
#
def y
derivation(0)
end
# Returns the printed formula of this polynom
def formula(derivation = 0)
@formulas[derivation].join " + "
end
end
# Creates attribute clusters for all available attributes
class AttributeCluster
include Environment
def initialize klass, options = {}
@options = { :degree => 0, :timebase => 1.second, :scope => :all }.merge(options)
@degree, @klass, @cluster = @options[:degree], klass, cluster(@options[:timebase], @options[:scope])
cluster_attribute :durations
cluster_attribute :requests
cluster_attribute_with_options :methods
cluster_attribute_with_options :statuses
cluster_attribute_with_options :paths
cluster_attribute_with_options :benchmarks
end
private
# Allows the attribute reader definition for attribues without options like this:
#
# cluster_attribute :durations
#
def cluster_attribute(name)
attribute_symbol = "@#{name}".to_sym
instance_variable_set(attribute_symbol, @klass.new(@cluster.timesteps, @cluster.send(name.to_sym), @degree))
self.class.class_eval do
attr_reader name
end
end
# Allows the attribute reader definition for attribues with options like this:
#
# cluster_attribute_with_options :statuses
#
def cluster_attribute_with_options(name)
attribute_symbol = "@#{name}".to_sym
instance_variable_set attribute_symbol, {}
self.class.class_eval do
define_method(name) do |option|
attribute = instance_variable_get(attribute_symbol)
attribute[option.to_s] ||= @klass.new(@cluster.timesteps, @cluster.send(name.to_sym, option), @degree)
end
end
end
end
end
end
end