lib/statsample/regression/multiple/baseengine.rb in statsample-0.9.0 vs lib/statsample/regression/multiple/baseengine.rb in statsample-0.10.0
- old
+ new
@@ -1,36 +1,45 @@
module Statsample
module Regression
module Multiple
# Base class for Multiple Regression Engines
class BaseEngine
-
- include GetText
- bindtextdomain("statsample")
+ include Statsample::Summarizable
# Name of analysis
attr_accessor :name
-
+ # Minimum number of valid case for pairs of correlation
+ attr_reader :cases
+ # Number of valid cases (listwise)
+ attr_reader :valid_cases
+ # Number of total cases (dataset.cases)
+ attr_reader :total_cases
def self.univariate?
true
end
def initialize(ds, y_var, opts = Hash.new)
@ds=ds
+ @predictors_n=@ds.fields.size-1
+ @total_cases=@ds.cases
@cases=@ds.cases
@y_var=y_var
@r2=nil
@name=_("Multiple Regression: %s over %s") % [ ds.fields.join(",") , @y_var]
opts.each{|k,v|
self.send("#{k}=",v) if self.respond_to? k
}
end
# Calculate F Test
- def f_test
- @f_test||=Statsample::Test::F.new(ssr, sse, df_r, df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA")
+ def anova
+ @anova||=Statsample::Anova::OneWay.new(:ss_num=>ssr, :ss_den=>sse, :df_num=>df_r, :df_den=>df_e, :name_numerator=>_("Regression"), :name_denominator=>_("Error"), :name=>"ANOVA")
end
+ # Standard error of estimate
+ def se_estimate
+ Math::sqrt(sse.quo(df_e))
+ end
# Retrieves a vector with predicted values for y
def predicted
- (0...@ds.cases).collect { |i|
+ @total_cases.times.collect { |i|
invalid=false
vect=@dep_columns.collect {|v| invalid=true if v[i].nil?; v[i]}
if invalid
nil
else
@@ -42,11 +51,11 @@
def standarized_predicted
predicted.standarized
end
# Retrieves a vector with residuals values for y
def residuals
- (0...@ds.cases).collect{|i|
+ (0...@total_cases).collect{|i|
invalid=false
vect=@dep_columns.collect{|v| invalid=true if v[i].nil?; v[i]}
if invalid or @ds[@y_var][i].nil?
nil
else
@@ -60,10 +69,13 @@
end
# Sum of squares Total
def sst
raise "You should implement this"
end
+ def r2_adjusted
+ r2-((1-r2)*@predictors_n).quo(df_e)
+ end
# Sum of squares (regression)
def ssr
r2*sst
end
# Sum of squares (Error)
@@ -87,23 +99,23 @@
def mse
sse.quo(df_e)
end
# Degrees of freedom for regression
def df_r
- @dep_columns.size
+ @predictors_n
end
# Degrees of freedom for error
def df_e
- @ds_valid.cases-@dep_columns.size-1
+ @valid_cases-@predictors_n-1
end
# Fisher for Anova
def f
- f_test.f
+ anova.f
end
# p-value of Fisher
def probability
- f_test.probability
+ anova.probability
end
# Tolerance for a given variable
# http://talkstats.com/showthread.php?t=5056
def tolerance(var)
ds=assign_names(@dep_columns)
@@ -128,23 +140,25 @@
out[k]=Math::sqrt(mse/(@ds[k].sum_of_squares * tolerance(k)))
}
out
end
# Estandar error of R^2
+ # ????
def se_r2
Math::sqrt((4*r2*(1-r2)**2*(df_e)**2).quo((@cases**2-1)*(@cases+3)))
end
# Estimated Variance-Covariance Matrix
# Used for calculation of se of constant
def estimated_variance_covariance_matrix
mse_p=mse
columns=[]
- @ds_valid.each_vector{|k,v|
+ @ds_valid.fields.each{|k|
+ v=@ds_valid[k]
columns.push(v.data) unless k==@y_var
}
- columns.unshift([1.0]*@ds_valid.cases)
+ columns.unshift([1.0]*@valid_cases)
x=Matrix.columns(columns)
matrix=((x.t*x)).inverse * mse
matrix.collect {|i| Math::sqrt(i) if i>0 }
end
# T for constant
@@ -153,28 +167,25 @@
end
# Standard error for constant
def constant_se
estimated_variance_covariance_matrix[0,0]
end
- def summary
- rp=ReportBuilder.new()
- rp.add(self)
- rp.to_text
- end
def report_building(b)
b.section(:name=>@name) do |g|
c=coeffs
- g.text(_("Engine: %s") % self.class)
- g.text(_("Cases(listwise)=%d(%d)") % [@ds.cases, @ds_valid.cases])
- g.text("R=#{sprintf('%0.3f',r)}")
- g.text("R^2=#{sprintf('%0.3f',r2)}")
+ g.text _("Engine: %s") % self.class
+ g.text(_("Cases(listwise)=%d(%d)") % [@total_cases, @valid_cases])
+ g.text _("R=%0.3f") % r
+ g.text _("R^2=%0.3f") % r2
+ g.text _"R^2 Adj=%0.3f" % r2_adjusted
+ g.text _("Std.Error R=%0.3f") % se_estimate
g.text(_("Equation")+"="+ sprintf('%0.3f',constant) +" + "+ @fields.collect {|k| sprintf('%0.3f%s',c[k],k)}.join(' + ') )
- g.parse_element(f_test)
+ g.parse_element(anova)
sc=standarized_coeffs
cse=coeffs_se
- g.table(:name=>"Beta coefficients", :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
+ g.table(:name=>_("Beta coefficients"), :header=>%w{coeff b beta se t}.collect{|field| _(field)} ) do |t|
t.row([_("Constant"), sprintf("%0.3f", constant), "-", sprintf("%0.3f", constant_se), sprintf("%0.3f", constant_t)])
@fields.each do |f|
t.row([f, sprintf("%0.3f", c[f]), sprintf("%0.3f", sc[f]), sprintf("%0.3f", cse[f]), sprintf("%0.3f", c[f].quo(cse[f]))])
end
end
\ No newline at end of file