module Statsample
module Regression
module Multiple
# Pure Ruby Class for Multiple Regression Analysis.
# Slower than AlglibEngine, but is pure ruby and can use a pairwise aproach for missing values. 
# Coeffient calculation uses correlation matrix between the vectors
# If you need listwise aproach for missing values, use AlglibEngine, because is faster.
# 
# Example:
#
#   @a=[1,3,2,4,3,5,4,6,5,7].to_vector(:scale)
#   @b=[3,3,4,4,5,5,6,6,4,4].to_vector(:scale)
#   @c=[11,22,30,40,50,65,78,79,99,100].to_vector(:scale)
#   @y=[3,4,5,6,7,8,9,10,20,30].to_vector(:scale)
#   ds={'a'=>@a,'b'=>@b,'c'=>@c,'y'=>@y}.to_dataset
#   lr=Statsample::Regression::Multiple::RubyEngine.new(ds,'y')

class RubyEngine < MatrixEngine
  def initialize(ds,y_var, opts=Hash.new)
    matrix=ds.correlation_matrix
    fields_indep=ds.fields-[y_var]
    default={
      :y_mean=>ds[y_var].mean,
      :x_mean=>fields_indep.inject({}) {|ac,f|  ac[f]=ds[f].mean; ac},
      :y_sd=>ds[y_var].sd,
      :x_sd=>fields_indep.inject({}) {|ac,f|  ac[f]=ds[f].sd; ac},
      :cases=>Statsample::Bivariate.min_n_valid(ds)
    }
    opts=opts.merge(default)
    super(matrix, y_var, opts)
    @ds=ds
    @dy=ds[@y_var]
    @ds_valid=ds.dup_only_valid
    @total_cases=@ds.cases
    @valid_cases=@ds_valid.cases
    @ds_indep = ds.dup(ds.fields-[y_var])
    set_dep_columns
  end
  
  def set_dep_columns
    @dep_columns=[]
    @ds_indep.each_vector{|k,v|
      @dep_columns.push(v.data_with_nils)
    }                
  end

  def fix_with_mean
    i=0
    @ds_indep.each do |row|
      empty=[]
      row.each do |k,v|
        empty.push(k) if v.nil?
      end
      if empty.size==1
        @ds_indep[empty[0]][i]=@ds[empty[0]].mean
      end
      i+=1
    end
    @ds_indep.update_valid_data
    set_dep_columns
  end
  def fix_with_regression
    i=0
    @ds_indep.each{|row|
      empty=[]
      row.each{|k,v|
          empty.push(k) if v.nil?
      }
      if empty.size==1
        field=empty[0]
        lr=MultipleRegression.new(@ds_indep,field)
        fields=[]
        @ds_indep.fields.each{|f|
            fields.push(row[f]) unless f==field
        }
        @ds_indep[field][i]=lr.process(fields)
      end
      i+=1
    }
    @ds_indep.update_valid_data
    set_dep_columns
  end
  # Standard error for constant
  def constant_se
    estimated_variance_covariance_matrix[0,0]
  end
end
end
end
end