require 'rubyvis' module Statsample module Graph # = Boxplot # # From Wikipedia: # In descriptive statistics, a box plot or boxplot (also known as a box-and-whisker diagram or plot) is a convenient way of graphically depicting groups of numerical data through their five-number summaries: the smallest observation (sample minimum), lower quartile (Q1), median (Q2), upper quartile (Q3), and largest observation (sample maximum). A boxplot may also indicate which observations, if any, might be considered outliers. # # == Usage # === Svg output # a=[1,2,3,4].to_numeric # b=[3,4,5,6].to_numeric # puts Statsample::Graph::Boxplot.new(:vectors=>[a,b]).to_svg # === Using ReportBuilder # a=[1,2,3,4].to_numeric # b=[3,4,5,6].to_numeric # rb=ReportBuilder.new # rb.add(Statsample::Graph::Boxplot.new(:vectors=>[a,b])) # rb.save_html('boxplot.html') class Boxplot include Summarizable attr_accessor :name # Total width of Boxplot attr_accessor :width # Total height of Boxplot attr_accessor :height # Top margin attr_accessor :margin_top # Bottom margin attr_accessor :margin_bottom # Left margin attr_accessor :margin_left # Right margin attr_accessor :margin_right # Array with assignation to groups of bars # For example, for four vectors, # boxplot.groups=[1,2,1,3] # Assign same color to first and third element, and different to # second and fourth attr_accessor :groups # Minimum value on y-axis. Automaticly defined from data attr_accessor :minimum # Maximum value on y-axis. Automaticly defined from data attr_accessor :maximum # Vectors to box-ploting attr_accessor :vectors # The rotation angle, in radians. Text is rotated clockwise relative # to the anchor location. For example, with the default left alignment, # an angle of Math.PI / 2 causes text to proceed downwards. The default angle is zero. attr_accessor :label_angle attr_reader :x_scale, :y_scale # Create a new Boxplot. # Parameters: Hash of options # * :vectors: Array of vectors # * :groups: Array of same size as :vectors:, with name of groups # to colorize vectors def initialize(opts=Hash.new) @vectors=opts.delete :vectors raise "You should define vectors" if @vectors.nil? opts_default={ :name=>_("Boxplot"), :groups=>nil, :width=>400, :height=>300, :margin_top=>10, :margin_bottom=>20, :margin_left=>20, :margin_right=>20, :minimum=>nil, :maximum=>nil, :label_angle=>0 } @opts=opts_default.merge(opts) opts_default.keys.each {|k| send("#{k}=", @opts[k]) } end # Returns a Rubyvis panel with scatterplot def rubyvis_panel # :nodoc: that=self min,max=@minimum, @maximum min||=@vectors.map {|v| v.min}.min max||=@vectors.map {|v| v.max}.max margin_hor=margin_left + margin_right margin_vert=margin_top + margin_bottom x_scale = pv.Scale.ordinal(@vectors.size.times.map.to_a).split_banded(0, width-margin_hor, 4.0/5) y_scale=Rubyvis::Scale.linear(min,max).range(0,height-margin_vert) y_scale.nice # cache data colors=Rubyvis::Colors.category10 data=@vectors.map {|v| out={:percentil_25=>v.percentil(25), :median=>v.median, :percentil_75=>v.percentil(75), :name=>v.name} out[:iqr]=out[:percentil_75] - out[:percentil_25] irq_max=out[:percentil_75] + out[:iqr] irq_min=out[:percentil_25] - out[:iqr] # Find the last data inside the margin min = out[:percentil_25] max = out[:percentil_75] v.each {|d| min=d if d < min and d > irq_min max=d if d > max and d < irq_max } # Whiskers! out[:low_whisker]=min out[:high_whisker]=max # And now, data outside whiskers out[:outliers]=v.data_with_nils.find_all {|d| d < min or d > max } out } vis=Rubyvis::Panel.new do |pan| pan.width width - margin_hor pan.height height - margin_vert pan.bottom margin_bottom pan.left margin_left pan.right margin_right pan.top margin_top # Y axis pan.rule do data y_scale.ticks bottom y_scale stroke_style {|d| d!=0 ? "#eee" : "#000"} label(:anchor=>'left') do text y_scale.tick_format end end pan.rule do bottom 0 stroke_style 'black' end # Labels pan.label do |l| l.data data l.text_angle that.label_angle l.left {|v| x_scale[index] } l.bottom(-15) l.text {|v,x| v[:name]} end pan.panel do |bp| bp.data data bp.left {|v| x_scale[index]} bp.width x_scale.range_band # Bar bp.bar do |b| b.bottom {|v| y_scale[v[:percentil_25]]} b.height {|v| y_scale[v[:percentil_75]] - y_scale[v[:percentil_25]] } b.line_width 1 b.stroke_style {|v| if that.groups colors.scale(that.groups[parent.index]).darker else colors.scale(index).darker end } b.fill_style {|v| if that.groups colors.scale(that.groups[parent.index]) else colors.scale(index) end } end # Median bp.rule do |r| r.bottom {|v| y_scale[v[:median]]} r.width x_scale.range_band r.line_width 2 end ## # Whiskeys ## # Low whiskey bp.rule do |r| r.visible {|v| v[:percentil_25] > v[:low_whisker]} r.bottom {|v| y_scale[v[:low_whisker]]} end bp.rule do |r| r.visible {|v| v[:percentil_25] > v[:low_whisker]} r.bottom {|v| y_scale[v[:low_whisker]]} r.left {|v| x_scale.range_band / 2.0} r.height {|v| y_scale.scale(v[:percentil_25]) - y_scale.scale(v[:low_whisker])} end # High whiskey bp.rule do |r| r.visible {|v| v[:percentil_75] < v[:high_whisker]} r.bottom {|v| y_scale.scale(v[:high_whisker])} end bp.rule do |r| r.visible {|v| v[:percentil_75] < v[:high_whisker]} r.bottom {|v| y_scale.scale(v[:percentil_75])} r.left {|v| x_scale.range_band / 2.0} r.height {|v| y_scale.scale(v[:high_whisker]) - y_scale.scale(v[:percentil_75])} end # Outliers bp.dot do |dot| dot.shape_size 4 dot.data {|v| v[:outliers]} dot.left {|v| x_scale.range_band / 2.0} dot.bottom {|v| y_scale.scale(v)} dot.title {|v| v} end end end vis end # Returns SVG with scatterplot def to_svg rp=rubyvis_panel rp.render rp.to_svg end def report_building(builder) # :nodoc: builder.section(:name=>name) do |b| b.image(to_svg, :type=>'svg', :width=>width, :height=>height) end end end end end