module Statsample # A histogram consists of a set of bins which count the # number of events falling into a given range of a continuous variable x. # # This implementations follows convention of GSL # for specification. # # * Verbatim: * # # The range for bin[i] is given by range[i] to range[i+1]. # For n bins there are n+1 entries in the array range. # Each bin is inclusive at the lower end and exclusive at the upper end. # Mathematically this means that the bins are defined # by the following inequality, # # bin[i] corresponds to range[i] <= x < range[i+1] # # Here is a diagram of the correspondence between ranges and bins # on the number-line for x, # # # [ bin[0] )[ bin[1] )[ bin[2] )[ bin[3] )[ bin[4] ) # ---|---------|---------|---------|---------|---------|--- x # r[0] r[1] r[2] r[3] r[4] r[5] # # # In this picture the values of the range array are denoted by r. # On the left-hand side of each bin the square bracket ‘[’ denotes # an inclusive lower bound ( r <= x), and the round parentheses ‘)’ # on the right-hand side denote an exclusive upper bound (x < r). # Thus any samples which fall on the upper end of the histogram are # excluded. # If you want to include this value for the last bin you will need to # add an extra bin to your histogram. # # # == Reference: # * http://www.gnu.org/software/gsl/manual/html_node/The-histogram-struct.html class Histogram include Enumerable class << self # Alloc +n_bins+, using +range+ as ranges of bins def alloc(n_bins, range=nil, opts=Hash.new) Histogram.new(n_bins, range, opts) end # Alloc +n_bins+ bins, using +p1+ as minimum and +p2+ # as maximum def alloc_uniform(n_bins, p1=nil,p2=nil) if p1.is_a? Array min,max=p1 else min,max=p1,p2 end range=max - min step=range / n_bins.to_f range=(n_bins+1).times.map {|i| min + (step*i)} Histogram.new(range) end end attr_accessor :name attr_reader :bin attr_reader :range include GetText bindtextdomain("statsample") def initialize(p1, min_max=false, opts=Hash.new) if p1.is_a? Array range=p1 @n_bins=p1.size-1 elsif p1.is_a? Integer @n_bins=p1 end @bin=[0.0]*(@n_bins) if(min_max) min, max=min_max[0], min_max[1] range=Array.new(@n_bins+1) (@n_bins+1).times {|i| range[i]=min+(i*(max-min).quo(@n_bins)) } end range||=[0.0]*(@n_bins+1) set_ranges(range) @name="" opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k } end # Number of bins def bins @n_bins end def increment(x, w=1) if x.respond_to? :each x.each{|y| increment(y,w) } elsif x.is_a? Numeric (range.size - 1).times do |i| if x >= range[i] and x < range[i+1] @bin[i] += w break end end end end def set_ranges(range) raise "Range size should be bin+1" if range.size!=@bin.size+1 @range=range end def get_range(i) [@range[i],@range[i+1]] end def max @range.last end def min @range.first end def max_val @bin.max end def min_val @bin.min end def each bins.times.each do |i| r=get_range(i) arg={:i=>i, :low=>r[0],:high=>r[1], :middle=>(r[0]+r[1]) / 2.0, :value=>@bin[i]} yield arg end end def estimated_variance sum,n=0,0 mean=estimated_mean each do |v| sum+=v[:value]*(v[:middle]-mean)**2 n+=v[:value] end sum / (n-1) end def estimated_standard_deviation Math::sqrt(estimated_variance) end def estimated_mean sum,n=0,0 each do |v| sum+= v[:value]* v[:middle] n+=v[:value] end sum / n end alias :mean :estimated_mean alias :sigma :estimated_standard_deviation def sum(start=nil,_end=nil) start||=0 _end||=@n_bins-1 (start.._end).inject(0) {|ac,i| ac+@bin[i]} end def report_building(generator) hg=Statsample::Graph::Histogram.new(self) generator.parse_element(hg) end def report_building_text(generator) @range.each_with_index do |r,i| next if i==@bin.size generator.text(sprintf("%5.2f : %d", r, @bin[i])) end end end end