# -*- coding: utf-8 -*- class RailsDataExplorer class Chart # Responsibilities: # * Render a group of box plots for bivariate analysis of a categorical and # a numerical data series. One box plot is rendered for each distinct # categorical value. # # Collaborators: # * DataSet # # Resources: # * http://bl.ocks.org/jensgrubert/7789216 # * http://www.datavizcatalogue.com/methods/box_plot.html#.U0S8Ra1dUyE # * http://mbostock.github.io/protovis/ex/box-and-whisker.html # * http://bl.ocks.org/mbostock/4061502 # * http://johan.github.io/d3/ex/box.html # * http://johan.github.io/d3/ex/box.html # * http://bl.ocks.org/mbostock/4061502 # * http://www.stata.com/support/faqs/graphics/gph/graphdocs/horizontal-box-plot-of-variable-by-values-of-categorical-variable/ class BoxPlotGroup < Chart def initialize(_data_set, options = {}) @data_set = _data_set @options = {}.merge(options) end def compute_chart_attrs x_candidates = @data_set.data_series.find_all { |ds| (ds.chart_roles[Chart::BoxPlotGroup] & [:x, :any]).any? } y_candidates = @data_set.data_series.find_all { |ds| (ds.chart_roles[Chart::BoxPlotGroup] & [:y, :any]).any? } x_ds = x_candidates.first y_ds = (y_candidates - [x_ds]).first return false if x_ds.nil? || y_ds.nil? # initialize values_hash values_hash = y_ds.uniq_vals.inject({}) { |m,y_val| m[y_val] = [] m } # populate values hash y_ds.values.each_with_index { |y_val, idx| next if (y_val.nil? || Float::NAN == y_val) values_hash[y_val] << x_ds.values[idx] } y_sorted_keys = y_ds.uniq_vals.sort( &y_ds.label_sorter( nil, lambda { |a,b| a <=> b } ) ) sorted_values = y_sorted_keys.map { |y_val| values_hash[y_val] } # Compute min and max values based on interquartile range of each # boxplot. Objective is to normalize boxplots so that the widest chart # uses almost the entire space available. # Iterate over all individual boxplots global_min = Float::INFINITY global_max = -Float::INFINITY sorted_values.each { |x_vals| ds = DataSeries.new('_', x_vals) desc_stats = ds.descriptive_statistics # compute first and third quartile. Use min and max if they are nil # for very small data series with only one or two entries. q1 = desc_stats.detect { |e| '25%ile' == e[:label] }[:value] || x_vals.min q3 = desc_stats.detect { |e| '75%ile' == e[:label] }[:value] || x_vals.max iqr = (q3 - q1) * 1.5 local_min = [x_vals.min, q1 - iqr].max global_min = [global_min, local_min].min local_max = [x_vals.max, q3 + iqr].min global_max = [global_max, local_max].max } { values: sorted_values, category_labels: y_sorted_keys, min: global_min, max: global_max, base_width: 100, base_height: 960, axis_tick_format: x_ds.axis_tick_format, num_box_plots: y_ds.uniq_vals_count, axis_scale: DataSeries.new('_', [global_min, global_max]).axis_scale(:d3) } end def render return '' unless render? ca = compute_chart_attrs return '' unless ca svg_trs = ca[:category_labels].map { |cat_label| %(