module BigBench module PostProcessor # The environment in which the post processors are evaluated. Every method defined here is available in # the post_process block and run! methods of the predefined post processors module Environment # Resets the whole post processor environment def self.reset! @@clusters, @@statistics, @@normal_distribution, @@regressions, @@appearings = {}, {}, {}, {}, {} @@trackings = [] @@scope = :all end reset! # Adding the sum and average methods to the default array class ::Array def sum reduce(:+).to_f end def average result = sum / size.to_f result.nan? ? 0 : result end end # Is raised when a benchmark scope block doesn't find the desired benchmark class BenchmarkNotFound < StandardError def initialize(unexistant_benchmark) @unexistant_benchmark = unexistant_benchmark end def message "Could not find Benchmark: '#{@unexistant_benchmark}'. Available benchmarks are: #{BigBench.benchmarks.map{ |b| b.name }.join(', ')}" end end # Iterates through every tracking and returns a tracking hash of the following form: # # { # :elapsed => 2.502132, # :start => 1333986292.1755981, # :stop => 1333986293.618884, # :duration => 1443, # :benchmark => "index page", # :url => "http://www.google.de/", # :path => "/", # :method => "get", # :status => 200 # } # def each_tracking File.open(BigBench.config.output, "r+") do |file| file.each_line { |line| yield JSON.parse(line).inject({}){|memo,(k,v)| memo[k.to_sym] = v; memo} unless line.blank? } end end # Puts all tracking hashes into a huge array. Warning, this method call might take quite long! # The results are cached, so you can call trackings in the future without any pain def trackings return @@trackings unless @@trackings.empty? each_tracking{ |tracking| @@trackings << tracking } @@trackings end # Returns the current scope the environment works in def scope @@scope end # Executes the including methods in the scope of the benchmark: # # # For the "index page" benchmark # scope_to_benchmark "index page" do # cluster.durations # cluster.requests # end # def scope_to_benchmark name raise BenchmarkNotFound.new(name) unless BigBench.benchmarks.map{|b| b.name }.include?(name) @@scope = name yield @@scope = nil end # Iterates over all benchmarks and automatically executes all methods in the benchmark scope like this: # # # For all benchmarks # cluster.durations # cluster.requests # # # For each benchmark # each_benchmark do |benchmark| # cluster.durations # cluster.requests # end # def each_benchmark BigBench.benchmarks.each do |benchmark| scope_to_benchmark(benchmark.name) do yield benchmark end end end # Returns a clustered overview of all trackings. By default the trackings are clustered by second, but you # can also specify any ammount of seconds to group together. A cluster then has the following methods: # # # Duration was 120 seconds # cluster.timesteps # => [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,..., 120] (seconds) # cluster.durations # => [50.3, 51.2, 40.3, 51.3, 50.3, 55.3, 52.3, 50.3, 51.3, 50.3, 54.3,..., 50.3] (average duration in milliseconds) # cluster.requests # => [580, 569, 540, 524, 524, 525, 528, 520, 529, 527, 523,..., 524] (requests in that second) # cluster.methods(:get) # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (GET requests in that second) # cluster.methods(:post) # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (POST requests in that second) # cluster.statuses(200) # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (successful - requests in that second) # cluster.statuses(404) # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (not founds - requests in that second) # cluster.paths("/") # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (requests to a path in that second) # cluster.paths("/home") # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (requests to "/home" path in that second) # cluster.benchmark("index") # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (requests from the index benchmark in that second) # cluster.benchmark("user") # => [400, 509, 340, 424, 324, 525, 528, 520, 529, 527, 523,..., 524] (requests from the user benchmark in that second) # # # Duration was 120 seconds = 2 minutes # cluster(1.minute).timesteps # => [0, 1] (minutes) # cluster(1.minute).durations # => [50.3, 51.2] (average duration in milliseconds) # cluster(1.minute).requests # => [27836, 27684] (requests in that minute) # def cluster(timebase = 1.second, extra_scope = nil) cluster_scope = extra_scope || scope timebase_and_scope = [timebase.to_i, cluster_scope] return @@clusters[timebase_and_scope] unless @@clusters[timebase_and_scope].nil? @@clusters[timebase_and_scope] = Cluster.new(timebase, cluster_scope) end # Returns an array of appearing attributes in the selected tracking scope. # # appearing.statuses # => [200, 404] # appearing.methods # => ["get", "post"] # appearing.paths # => ["/", "/basic/auth" # def appearing(timebase = 1.second) return @@appearings[scope] unless @@appearings[scope].nil? @@appearings[scope] ||= Appearings.new(scope) end # Returns the default statistics for a given attribute. The following statistics are available: # # statistics.durations.max # => 78.2 # statistics.durations.min # => 12.3 # # statistics.durations.mean # => 45.2 # statistics.durations.average # => 45.2 # # statistics.durations.standard_deviation # => 11.3 # statistics.durations.sd # => 11.3 # # statistics.durations.squared_deviation # => 60.7 # statistics.durations.variance # => 60.7 # def statistics(timebase = 1.second) timebase_and_scope = [timebase.to_i, scope] return @@statistics[timebase_and_scope] unless @@statistics[timebase_and_scope].nil? @@statistics[timebase_and_scope] ||= AttributeCluster.new(Statistics, :timebase => timebase, :scope => scope) # Duration is the only value that shouldn't be clustered in the statistics because we have real float values in it, and do not only count +1 # for the request. Because of this, we'll exchange the clustered durations y values with an array of all unclustered tracking durations @@statistics[timebase_and_scope].instance_eval do @durations.instance_eval do @y = trackings.map{ |tracking| tracking[:duration] if tracking[:benchmark] == scope or scope == :all }.compact end end @@statistics[timebase_and_scope] end # Returns a gaussian distribution for the specified attribute. It automatically calculates the neccessary mean and variance values and adapts # the x values to fit the bell curve best. # # normal_distribution.durations.x # => [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, ...] # normal_distribution.durations.y # => [0, 0, 7, 8, 9, 10, 11, 10, 9, 8, 7, 4, ...] # def normal_distribution(timebase = 1.second) timebase_and_scope = [timebase.to_i, scope] return @@normal_distribution[timebase_and_scope] unless @@normal_distribution[timebase_and_scope].nil? @@normal_distribution[timebase_and_scope] ||= AttributeCluster.new(NormalDistribution, :timebase => timebase, :scope => scope) # Duration is the only value that shouldn't be clustered in the statistics because we have real float values in it, and do not only count +1 # for the request. Because of this, we'll exchange the clustered durations y values with an array of all unclustered tracking durations @@normal_distribution[timebase_and_scope].instance_eval do @durations.instance_eval do @y = trackings.map{ |tracking| tracking[:duration] if tracking[:benchmark] == scope or scope == :all }.compact end end @@normal_distribution[timebase_and_scope] end # Returns a polynomial regression of a degree, a derivation and a timebase. Possible options are: # # [:degree] By default the degree is 1 which results in a linear regression. There's no limit to the degree. # [:derivation] By default the normal function, which means no derivation is returned. Currently only the first derivation is supported. # [:timebase] By default the cluster size is 1.second. Any timelimit can be added here, e.g. 1.minute # # # # Return a linear regression for the durations, clustered by seconds # polynomial_regression.durations.y # polynomial_regression(:degree => 1, :timebase => 1.second).durations.y # polynomial_regression(:degree => 1, :timebase => 1.second).durations.derivation(0) # # # Return the first derivation of the linear regression for the durations, clustered by seconds # polynomial_regression(:degree => 1).durations.derivation(1) # polynomial_regression(:degree => 1, :timebase => 1.second).durations.derivation(1) # # # Return a second degree polynomial regression for the durations, clustered by seconds # polynomial_regression(:degree => 2).durations.derivation(0) # polynomial_regression(:degree => 2, :timebase => 1.second).durations.derivation(0) # # # Return the first derivation of the second degree polynomial regression for the durations, clustered by seconds # polynomial_regression(:degree => 2).durations.derivation(0) # polynomial_regression(:degree => 2, :timebase => 1.second).durations.derivation(0) # def polynomial_regression(new_options = {}) options = { :degree => 1, :timebase => 1.second }.merge(new_options) degree_and_timebase_and_scope = [options[:degree], options[:timebase].to_i, scope] return @@regressions[degree_and_timebase_and_scope] unless @@regressions[degree_and_timebase_and_scope].nil? @@regressions[degree_and_timebase_and_scope] ||= AttributeCluster.new(PolynomialRegression, :timebase => options[:timebase], :scope => scope, :degree => options[:degree]) end # Calculates the default statistic values for a given attribute, like the duration, requests, etc. class Statistics include Environment attr_reader :x attr_reader :y def initialize x, y, degree @x, @y = x, y end # The maximum of the attribute def max @y.max end # The minimum of the attribute def min @y.min end # The mean or average of the attribute def mean @y.average end alias_method :average, :mean # The standard deviation or sd of the attribute def standard_deviation u = mean @y.inject(0){ |result, element| result + (element - u).abs }.to_f / @y.size.to_f end alias_method :sd, :standard_deviation # The squared deviation or variance of the attribute def squared_deviation u = mean @y.inject(0){ |result, element| result + (element - u) ** 2 }.to_f / @y.size.to_f end alias_method :variance, :squared_deviation end # Calculates a gaussion normal distribution with the mean a variance of the supplied y values class NormalDistribution include Environment attr_reader :mean attr_reader :sd attr_reader :x attr_reader :formula def initialize x, y, degree @mean, @x = y.average, x @sd = y.inject(0){ |result, element| result + (element - @mean).abs }.to_f / y.size.to_f # Setup functions that map the guassian normal distribution @distribution = lambda { |x| 1 / Math.sqrt(2 * Math::PI * @sd) * Math::E**( -0.5 * (x - @mean)**2 / @sd) } # Setup x to match the center mean @x = [] upper_limit = @mean + (6 * @sd) lower_limit = @mean - (6 * @sd) delta_limit = upper_limit - lower_limit steps = 120 step_size = delta_limit / steps steps.times{ |step| @x << (step * step_size) + lower_limit } # Store formula for printing @formula = "1 / sqrt(2 * pi * #{@sd}) * e**( -0.5 * (x - #{@mean})**2 / #{@sd})" end # Returns an array with the distribution values like this: # # [0.0, 0.1, 1.2, 4.5, 10.8, 4.5, 1.2, 0.1, 0.0] # def y @x.map{ |x| @distribution.call(x).round(3) } end end # Clusters the trackings in the specified timebase. By default everything is clustered by seconds. class Cluster include Environment # Allows the registering of multi dimensioned attributes and 0s out values that aren't present # # attr_multi_dimension_reader :methods # def self.attr_multi_dimension_reader(attribute) define_method(attribute) do |appearance| variable = instance_variable_get("@#{attribute}".to_sym) variable.key?(appearance.to_s) ? variable[appearance.to_s] : timesteps.dup.fill(0) end end attr_reader :timesteps attr_reader :durations attr_reader :requests attr_multi_dimension_reader :methods attr_multi_dimension_reader :statuses attr_multi_dimension_reader :paths def initialize(timebase = 1.second, scope = :all) @timesteps, @durations, @durations_array, @requests, @methods, @statuses, @paths, @scope = [], [], [], [], {}, {}, {}, scope # Single dimension attributes steps = BigBench.config.duration.to_i / timebase (0..steps).to_a.each do |timestep| @timesteps[timestep] = timestep @durations_array[timestep] = [] @requests[timestep] = 0 end # Multi dimension attributes [:methods, :statuses, :paths].each do |attribute| appearing.send(attribute).each do |appearance| variable = instance_variable_get("@#{attribute}".to_sym) variable[appearance.to_s] = [] @timesteps.each { |timestep| variable[appearance.to_s][timestep] = 0 } end end # Cluster trackings trackings.each do |tracking| next if !(tracking[:benchmark] == scope or scope == :all) timestep = tracking[:elapsed].to_i / timebase @durations_array[timestep] << tracking[:duration] @requests[timestep] += 1 @methods[tracking[:method].to_s][timestep] += 1 @statuses[tracking[:status].to_s][timestep] += 1 @paths[tracking[:path].to_s][timestep] += 1 end # Compute mean of durations @timesteps.each do |timestep| @durations[timestep] = @durations_array[timestep].average end end end # Lists the appearing attributes for a scope class Appearings include Environment attr_reader :methods attr_reader :statuses attr_reader :paths def initialize scope = :all @methods, @statuses, @paths = [], [], [] trackings.each do |tracking| next if !(tracking[:benchmark] == scope or scope == :all) # Add appearing attributes @methods << tracking[:method] @statuses << tracking[:status] @paths << tracking[:path] end # Unique attributes @methods.uniq! @statuses.uniq! @paths.uniq! end end # This class performs the actual regression for a specfied degree and timebase. As x it returns the timebase # values for the corresponding timebase - e.g. the seconds - and as y it returns the corresponding regression # values. Additionally it offers the derivations for the regressions with the deviation method. class PolynomialRegression # An array with the seconds in the timebase # # [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] # attr_reader :x attr_reader :degree # Returns the coefficients calculated for the regression and the degree like # # [3.428734, 1.176235] # attr_reader :coefficients def initialize x, y, degree @x, @degree, @derivations, @formulas = x, degree, [], [] raise "Regression is not possible for a single time value, choose a smaller timebase" if @x.size <= 1 # Perform regression x_data = x.map { |xi| (0..degree).map { |pow| (xi**pow).to_f } } mx = ::Matrix[*x_data] my = ::Matrix.column_vector(y) # Calculate coefficients @coefficients = ((mx.t * mx).inv * mx.t * my).transpose.to_a[0] # Setup functions that map the actual polynom @derivations << lambda { |x| (0..@degree).to_a.inject(0) { |result, d| result + (@coefficients[d] * x**d) }} @derivations << lambda { |x| (1..@degree).to_a.inject(0) { |result, d| result + (d * (@coefficients[d] * x**(d-1))) }} # Store formulas for printing @formulas << (0..@degree).to_a.map { |d| d == 0 ? @coefficients[d] : "#{@coefficients[d]}x^#{d}" } @formulas << (1..@degree).to_a.map { |d| d == 1 ? @coefficients[d] : "#{d}*#{@coefficients[d]}x^#{d-1}" } end # Returns an array with the computed y-values for the corresponding derivation. The default derivation is the # 0. derivation which is the original regression that is equal to the y method. The result looks like this: # # [2.5, 2.6, 2.7, 2.8, 3.0, 3.2, 3.4, 3.8, 4.0, 4.9] # def derivation(derivation = 0) @x.map{ |x| @derivations[derivation].call(x) } end # Returns an array with the regression values like this: # # [2.5, 2.6, 2.7, 2.8, 3.0, 3.2, 3.4, 3.8, 4.0, 4.9] # def y derivation(0) end # Returns the printed formula of this polynom def formula(derivation = 0) @formulas[derivation].join " + " end end # Creates attribute clusters for all available attributes class AttributeCluster include Environment def initialize klass, options = {} @options = { :degree => 0, :timebase => 1.second, :scope => :all }.merge(options) @degree, @klass, @cluster = @options[:degree], klass, cluster(@options[:timebase], @options[:scope]) cluster_attribute :durations cluster_attribute :requests cluster_attribute_with_options :methods cluster_attribute_with_options :statuses cluster_attribute_with_options :paths cluster_attribute_with_options :benchmarks end private # Allows the attribute reader definition for attribues without options like this: # # cluster_attribute :durations # def cluster_attribute(name) attribute_symbol = "@#{name}".to_sym instance_variable_set(attribute_symbol, @klass.new(@cluster.timesteps, @cluster.send(name.to_sym), @degree)) self.class.class_eval do attr_reader name end end # Allows the attribute reader definition for attribues with options like this: # # cluster_attribute_with_options :statuses # def cluster_attribute_with_options(name) attribute_symbol = "@#{name}".to_sym instance_variable_set attribute_symbol, {} self.class.class_eval do define_method(name) do |option| attribute = instance_variable_get(attribute_symbol) attribute[option.to_s] ||= @klass.new(@cluster.timesteps, @cluster.send(name.to_sym, option), @degree) end end end end end end end