module Statsample # Class to create crosstab of data # With this, you can create reports and do chi square test # The first vector will be at rows and the second will the the columns # class Crosstab include Summarizable attr_reader :v_rows, :v_cols attr_accessor :row_label, :column_label, :name, :percentage_row, :percentage_column, :percentage_total def initialize(v1, v2, opts=Hash.new) raise ArgumentError, "Vectors should be the same size" unless v1.size==v2.size @v_rows, @v_cols = Statsample.only_valid_clone( Daru::Vector.new(v1), Daru::Vector.new(v2)) @cases = @v_rows.size @row_label = v1.name @column_label = v2.name @name = nil @percentage_row = @percentage_column = @percentage_total=false opts.each do |k,v| self.send("#{k}=",v) if self.respond_to? k end @name ||= _("Crosstab %s - %s") % [@row_label, @column_label] end def rows_names @v_rows.factors.sort.reset_index! end def cols_names @v_cols.factors.sort.reset_index! end def rows_total @v_rows.frequencies.to_h end def cols_total @v_cols.frequencies.to_h end def frequencies base = rows_names.inject([]) do |s,row| s += cols_names.collect { |col| [row,col] } end.inject({}) do |s,par| s[par]=0 s end base.update(Daru::Vector.new(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a).frequencies.to_h) end def to_matrix f = frequencies rn = rows_names cn = cols_names Matrix.rows(rn.collect{|row| cn.collect{|col| f[[row,col]]} }) end def frequencies_by_row f=frequencies rows_names.inject({}){|sr,row| sr[row]=cols_names.inject({}) {|sc,col| sc[col]=f[[row,col]]; sc} sr } end def frequencies_by_col f=frequencies cols_names.inject({}){|sc,col| sc[col]=rows_names.inject({}) {|sr,row| sr[row]=f[[row,col]]; sr} sc } end # Chi square, based on expected and real matrix def chi_square require 'statsample/test' Statsample::Test.chi_square(self.to_matrix, matrix_expected) end # Useful to obtain chi square def matrix_expected rn=rows_names cn=cols_names rt=rows_total ct=cols_total t=@v_rows.size m=rn.collect{|row| cn.collect{|col| (rt[row]*ct[col]).quo(t) } } Matrix.rows(m) end def cols_empty_hash cols_names.inject({}) {|a,x| a[x]=0;a} end def report_building(builder) builder.section(:name=>@name) do |generator| fq=frequencies rn=rows_names cn=cols_names total=0 total_cols=cols_empty_hash generator.text "Chi Square: #{chi_square}" generator.text(_("Rows: %s") % @row_label) unless @row_label.nil? generator.text(_("Columns: %s") % @column_label) unless @column_label.nil? t=ReportBuilder::Table.new(:name=>@name+" - "+_("Raw"), :header=>[""]+cols_names.collect {|c| @v_cols.index_of(c)}+[_("Total")]) rn.each do |row| total_row=0 t_row=[@v_rows.index_of(row)] cn.each do |col| data=fq[[row,col]] total_row+=fq[[row,col]] total+=fq[[row,col]] total_cols[col]+=fq[[row,col]] t_row.push(data) end t_row.push(total_row) t.row(t_row) end t.hr t_row=[_("Total")] cn.each do |v| t_row.push(total_cols[v]) end t_row.push(total) t.row(t_row) generator.parse_element(t) if(@percentage_row) table_percentage(generator,:row) end if(@percentage_column) table_percentage(generator,:column) end if(@percentage_total) table_percentage(generator,:total) end end end def table_percentage(generator,type) fq=frequencies cn=cols_names rn=rows_names rt=rows_total ct=cols_total type_name=case type when :row then _("% Row") when :column then _("% Column") when :total then _("% Total") end t=ReportBuilder::Table.new(:name=>@name+" - "+_(type_name), :header=>[""]+cols_names.collect {|c| @v_cols.index_of(c) } + [_("Total")]) rn.each do |row| t_row=[@v_rows.index_of(row)] cn.each do |col| total=case type when :row then rt[row] when :column then ct[col] when :total then @cases end data = sprintf("%0.2f%%", fq[[row,col]]*100.0/ total ) t_row.push(data) end total=case type when :row then rt[row] when :column then @cases when :total then @cases end t_row.push(sprintf("%0.2f%%", rt[row]*100.0/total)) t.row(t_row) end t.hr t_row=[_("Total")] cn.each{|col| total=case type when :row then @cases when :column then ct[col] when :total then @cases end t_row.push(sprintf("%0.2f%%", ct[col]*100.0/total)) } t_row.push("100%") t.row(t_row) generator.parse_element(t) end end end