module Statsample # Class to create crosstab of data # With this, you can create reports and do chi square test # The first vector will be at rows and the second will the the columns # class Crosstab include GetText bindtextdomain("statsample") attr_reader :v_rows, :v_cols attr_accessor :row_label, :column_label, :name, :percentage_row, :percentage_column, :percentage_total def initialize(v1,v2,opts=Hash.new) raise ArgumentError, "Both arguments should be Vectors" unless v1.is_a? Statsample::Vector and v2.is_a? Statsample::Vector raise ArgumentError, "Vectors should be the same size" unless v1.size==v2.size @v_rows, @v_cols=Statsample.only_valid(v1,v2) @cases=@v_rows.size @row_label=nil @column_label=nil @name=nil @percentage_row=@percentage_column=@percentage_total=false opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k } if(@name.nil?) if (!@row_label.nil? and !@column_label.nil?) @name=_("Crosstab %s - %s") % [@row_label, @column_label] else @name=_("Crosstab") end end end def rows_names @v_rows.factors.sort end def cols_names @v_cols.factors.sort end def rows_total @v_rows.frequencies end def cols_total @v_cols.frequencies end def frequencies base=rows_names.inject([]){|s,row| s+=cols_names.collect{|col| [row,col]} }.inject({}) {|s,par| s[par]=0 s } base.update(Statsample::vector_cols_matrix(@v_rows,@v_cols).to_a.to_vector.frequencies) end def to_matrix f=frequencies rn=rows_names cn=cols_names Matrix.rows(rn.collect{|row| cn.collect{|col| f[[row,col]]} }) end def frequencies_by_row f=frequencies rows_names.inject({}){|sr,row| sr[row]=cols_names.inject({}) {|sc,col| sc[col]=f[[row,col]]; sc} sr } end def frequencies_by_col f=frequencies cols_names.inject({}){|sc,col| sc[col]=rows_names.inject({}) {|sr,row| sr[row]=f[[row,col]]; sr} sc } end # Chi square, based on expected and real matrix def chi_square require 'statsample/test' Statsample::Test.chi_square(self.to_matrix, matrix_expected) end # Useful to obtain chi square def matrix_expected rn=rows_names cn=cols_names rt=rows_total ct=cols_total t=@v_rows.size m=rn.collect{|row| cn.collect{|col| (rt[row]*ct[col]).quo(t) } } Matrix.rows(m) end def cols_empty_hash cols_names.inject({}) {|a,x| a[x]=0;a} end def to_reportbuilder(generator) anchor=generator.add_toc_entry(_("Crosstab: ")+name) generator.add_html "
"+_("Crosstab")+" #{@name}" fq=frequencies rn=rows_names cn=cols_names total=0 total_cols=cols_empty_hash generator.add_text "Chi Square: #{chi_square}" generator.add_text(_("Rows: %s") % @row_label) unless @row_label.nil? generator.add_text(_("Columns: %s") % @column_label) unless @column_label.nil? t=ReportBuilder::Table.new(:name=>@name+" - "+_("Raw"), :header=>[""]+cols_names.collect {|c| @v_cols.labeling(c)}+[_("Total")]) rn.each do |row| total_row=0 t_row=[@v_rows.labeling(row)] cn.each do |col| data=fq[[row,col]] total_row+=fq[[row,col]] total+=fq[[row,col]] total_cols[col]+=fq[[row,col]] t_row.push(data) end t_row.push(total_row) t.add_row(t_row) end t.add_horizontal_line t_row=[_("Total")] cn.each do |v| t_row.push(total_cols[v]) end t_row.push(total) t.add_row(t_row) generator.parse_element(t) if(@percentage_row) table_percentage(generator,:row) end if(@percentage_column) table_percentage(generator,:column) end if(@percentage_total) table_percentage(generator,:total) end generator.add_html("
") end def table_percentage(generator,type) fq=frequencies cn=cols_names rn=rows_names rt=rows_total ct=cols_total type_name=case type when :row then _("% Row") when :column then _("% Column") when :total then _("% Total") end t=ReportBuilder::Table.new(:name=>@name+" - "+_(type_name), :header=>[""]+cols_names.collect {|c| @v_cols.labeling(c) } + [_("Total")]) rn.each do |row| t_row=[@v_rows.labeling(row)] cn.each do |col| total=case type when :row then rt[row] when :column then ct[col] when :total then @cases end data = sprintf("%0.2f%%", fq[[row,col]]*100.0/ total ) t_row.push(data) end total=case type when :row then rt[row] when :column then @cases when :total then @cases end t_row.push(sprintf("%0.2f%%", rt[row]*100.0/total)) t.add_row(t_row) end t.add_horizontal_line t_row=[_("Total")] cn.each{|col| total=case type when :row then @cases when :column then ct[col] when :total then @cases end t_row.push(sprintf("%0.2f%%", ct[col]*100.0/total)) } t_row.push("100%") t.add_row(t_row) generator.parse_element(t) end def to_s fq=frequencies rn=rows_names cn=cols_names total=0 total_cols=cols_empty_hash max_row_size = rn.inject(0) {|s,x| sl=@v_rows.labeling(x).size; sl>s ? sl : s} max_row_size=max_row_size<6 ? 6 : max_row_size max_col_size = cn.inject(0) {|s,x| sl=@v_cols.labeling(x).size; sl>s ? sl : s} max_col_size = frequencies.inject(max_col_size) {|s,x| x[1].to_s.size>s ? x[1].to_s.size : s} out="" out << " " * (max_row_size+2) << "|" << cn.collect{|c| name=@v_cols.labeling(c); " "+name+(" "*(max_col_size-name.size))+" "}.join("|") << "| Total\n" linea="-" * (max_row_size+2) << "|" << ("-"*(max_col_size+2) +"|")*cn.size << "-"*7 << "\n" out << linea rn.each{|row| total_row=0; name=@v_rows.labeling(row) out << " " +name << " "*(max_row_size-name.size) << " | " cn.each{|col| data=fq[[row,col]].to_s total_row+=fq[[row,col]] total+=fq[[row,col]] total_cols[col]+=fq[[row,col]] out << " " << data << " "*(max_col_size-data.size) << "| " } out << " " << total_row.to_s out << "\n" } out << linea out << " Total " << " "*(max_row_size-5) << "| " cn.each{|v| data=total_cols[v].to_s out << " " << data << " "*(max_col_size-data.size) << "| " } out << " " << total.to_s out end end end