lib/eps/regressor.rb in eps-0.1.0 vs lib/eps/regressor.rb in eps-0.1.1

- old
+ new

@@ -50,12 +50,13 @@ # https://statsmaths.github.io/stat612/lectures/lec13/lecture13.pdf # unforutnately, this method is unstable # haven't found an efficient way to do QR-factorization in Ruby # the extendmatrix gem has householder and givens (givens has bug) # but methods are too slow + xt = x.t begin - @xtxi = (x.t * x).inverse + @xtxi = (xt * x).inverse rescue ExceptionForMatrix::ErrNotRegular constant = {} (1...x.column_count).each do |i| constant[i] = constant?(x.column(i)) end @@ -75,19 +76,22 @@ removed.sort.reverse.each do |i| # @coefficient_names.delete_at(i) vectors.delete_at(i) end x = Matrix.columns(vectors) + xt = x.t # try again begin - @xtxi = (x.t * x).inverse + @xtxi = (xt * x).inverse rescue ExceptionForMatrix::ErrNotRegular raise "Multiple solutions - GSL is needed to select one" end end - v2 = matrix_arr(@xtxi * x.t * y) + # huge performance boost + # by multiplying xt * y first + v2 = matrix_arr(@xtxi * (xt * y)) # add back removed removed.sort.each do |i| v2.insert(i, 0) end @@ -99,23 +103,11 @@ Hash[@coefficient_names.zip(v)] end end def evaluate(data, y = nil) - actual = y - - actual ||= - if daru?(data) - data[@target].to_a - else - data.map { |v| v[@target] } - end - - actual = prep_y(actual) - - estimated = predict(data) - Eps.metrics(actual, estimated) + super(data, y, target: @target) end # https://people.richland.edu/james/ictcm/2004/multiple.html def summary(extended: false) @summary_str ||= begin @@ -150,30 +142,9 @@ private def constant?(arr) arr.all? { |x| x == arr[0] } - end - - # determine if target is a string or symbol - def prep_target(target, data) - if daru?(data) - data.has_vector?(target) ? target : flip_target(target) - else - x = data[0] || {} - x[target] ? target : flip_target(target) - end - end - - def flip_target(target) - target.is_a?(String) ? target.to_sym : target.to_s - end - - def prep_y(y) - y.each do |yi| - raise "Target missing in data" if yi.nil? - end - y.map(&:to_f) end # add epsilon for perfect fits # consistent with GSL def t_value