module Statsample class DominanceAnalysis # == Goal # Generates Bootstrap sample to identity the replicability of a Dominance Analysis. See Azen & Bodescu (2003) for more information. # # == Usage # # require 'statsample' # a=100.times.collect {rand}.to_scale # b=100.times.collect {rand}.to_scale # c=100.times.collect {rand}.to_scale # d=100.times.collect {rand}.to_scale # ds={'a'=>a,'b'=>b,'c'=>c,'d'=>d}.to_dataset # ds['y']=ds.collect{|row| row['a']*5+row['b']*2+row['c']*2+row['d']*2+10*rand()} # dab=Statsample::DominanceAnalysis::Bootstrap.new(ds2, 'y', :debug=>true) # dab.bootstrap(100,nil) # puts dab.summary # Output # Sample size: 100 # t: 1.98421693632958 # # Linear Regression Engine: Statsample::Regression::Multiple::MatrixEngine # Table: Bootstrap report # -------------------------------------------------------------------------------------------- # | pairs | sD | Dij | SE(Dij) | Pij | Pji | Pno | Reproducibility | # -------------------------------------------------------------------------------------------- # | Complete dominance | # -------------------------------------------------------------------------------------------- # | a - b | 1.0 | 0.6150 | 0.454 | 0.550 | 0.320 | 0.130 | 0.550 | # | a - c | 1.0 | 0.9550 | 0.175 | 0.930 | 0.020 | 0.050 | 0.930 | # | a - d | 1.0 | 0.9750 | 0.131 | 0.960 | 0.010 | 0.030 | 0.960 | # | b - c | 1.0 | 0.8800 | 0.276 | 0.820 | 0.060 | 0.120 | 0.820 | # | b - d | 1.0 | 0.9250 | 0.193 | 0.860 | 0.010 | 0.130 | 0.860 | # | c - d | 0.5 | 0.5950 | 0.346 | 0.350 | 0.160 | 0.490 | 0.490 | # -------------------------------------------------------------------------------------------- # | Conditional dominance | # -------------------------------------------------------------------------------------------- # | a - b | 1.0 | 0.6300 | 0.458 | 0.580 | 0.320 | 0.100 | 0.580 | # | a - c | 1.0 | 0.9700 | 0.156 | 0.960 | 0.020 | 0.020 | 0.960 | # | a - d | 1.0 | 0.9800 | 0.121 | 0.970 | 0.010 | 0.020 | 0.970 | # | b - c | 1.0 | 0.8850 | 0.283 | 0.840 | 0.070 | 0.090 | 0.840 | # | b - d | 1.0 | 0.9500 | 0.181 | 0.920 | 0.020 | 0.060 | 0.920 | # | c - d | 0.5 | 0.5800 | 0.360 | 0.350 | 0.190 | 0.460 | 0.460 | # -------------------------------------------------------------------------------------------- # | General Dominance | # -------------------------------------------------------------------------------------------- # | a - b | 1.0 | 0.6500 | 0.479 | 0.650 | 0.350 | 0.000 | 0.650 | # | a - c | 1.0 | 0.9800 | 0.141 | 0.980 | 0.020 | 0.000 | 0.980 | # | a - d | 1.0 | 0.9900 | 0.100 | 0.990 | 0.010 | 0.000 | 0.990 | # | b - c | 1.0 | 0.9000 | 0.302 | 0.900 | 0.100 | 0.000 | 0.900 | # | b - d | 1.0 | 0.9700 | 0.171 | 0.970 | 0.030 | 0.000 | 0.970 | # | c - d | 1.0 | 0.5600 | 0.499 | 0.560 | 0.440 | 0.000 | 0.560 | # -------------------------------------------------------------------------------------------- # # Table: General averages # --------------------------------------- # | var | mean | se | p.5 | p.95 | # --------------------------------------- # | a | 0.133 | 0.049 | 0.062 | 0.218 | # | b | 0.106 | 0.048 | 0.029 | 0.199 | # | c | 0.035 | 0.032 | 0.002 | 0.106 | # | d | 0.023 | 0.019 | 0.002 | 0.062 | # --------------------------------------- # # == References: # # * Azen, R. & Budescu, D.V. (2003). The dominance analysis approach for comparing predictors in multiple regression. Psychological Methods, 8(2), 129-148. class Bootstrap include GetText include Writable bindtextdomain("statsample") # Total Dominance results attr_reader :samples_td # Conditional Dominance results attr_reader :samples_cd # General Dominance results attr_reader :samples_gd # General average results attr_reader :samples_ga # Name of fields attr_reader :fields # Regression class used for analysis attr_accessor :regression_class # Dataset attr_accessor :ds # Name of analysis attr_accessor :name # Alpha level of confidence. Default: ALPHA attr_accessor :alpha # Debug? attr_accessor :debug # Default level of confidence for t calculation ALPHA=0.95 # Create a new Dominance Analysis Bootstrap Object # # * ds: A Dataset object # * y_var: Name of dependent variable # * opts: Any other attribute of the class def initialize(ds,y_var, opts=Hash.new) @ds=ds @y_var=y_var @n=ds.cases @n_samples=0 @alpha=ALPHA @debug=false if y_var.is_a? Array @fields=ds.fields-y_var @regression_class=Regression::Multiple::MultipleDependent else @fields=ds.fields-[y_var] @regression_class=Regression::Multiple::MatrixEngine end @samples_ga=@fields.inject({}){|a,v| a[v]=[];a} @name=_("Bootstrap dominance Analysis: %s over %s") % [ ds.fields.join(",") , @y_var] opts.each{|k,v| self.send("#{k}=",v) if self.respond_to? k } create_samples_pairs end # lr_class deprecated alias_method :lr_class, :regression_class def da if @da.nil? @da=DominanceAnalysis.new(@ds,@y_var, :regression_class => @regression_class) end @da end # Creates n re-samples from original dataset and store result of # each sample on @samples_td, @samples_cd, @samples_gd, @samples_ga # # * number_samples: Number of new samples to add # * n: size of each new sample. If nil, equal to original sample size def bootstrap(number_samples,n=nil) number_samples.times{ |t| @n_samples+=1 puts _("Bootstrap %d of %d") % [t+1, number_samples] if @debug ds_boot=@ds.bootstrap(n) da_1=DominanceAnalysis.new(ds_boot, @y_var, :regression_class => @regression_class) da_1.total_dominance.each{|k,v| @samples_td[k].push(v) } da_1.conditional_dominance.each{|k,v| @samples_cd[k].push(v) } da_1.general_dominance.each{|k,v| @samples_gd[k].push(v) } da_1.general_averages.each{|k,v| @samples_ga[k].push(v) } } end def create_samples_pairs @samples_td={} @samples_cd={} @samples_gd={} @pairs=[] c=Statsample::Combination.new(2,@fields.size) c.each do |data| convert=data.collect {|i| @fields[i] } @pairs.push(convert) [@samples_td, @samples_cd, @samples_gd].each{|s| s[convert]=[] } end end # Summary of analysis def summary rp=ReportBuilder.new().add(self).to_text end def t Distribution::T.p_value(1-((1-@alpha) / 2), @n_samples - 1) end def report_building(generator) # :nodoc: raise "You should bootstrap first" if @n_samples==0 anchor=generator.toc_entry(_("DAB: ")+@name) generator.html "