lib/eps/regressor.rb in eps-0.1.0 vs lib/eps/regressor.rb in eps-0.1.1
- old
+ new
@@ -50,12 +50,13 @@
# https://statsmaths.github.io/stat612/lectures/lec13/lecture13.pdf
# unforutnately, this method is unstable
# haven't found an efficient way to do QR-factorization in Ruby
# the extendmatrix gem has householder and givens (givens has bug)
# but methods are too slow
+ xt = x.t
begin
- @xtxi = (x.t * x).inverse
+ @xtxi = (xt * x).inverse
rescue ExceptionForMatrix::ErrNotRegular
constant = {}
(1...x.column_count).each do |i|
constant[i] = constant?(x.column(i))
end
@@ -75,19 +76,22 @@
removed.sort.reverse.each do |i|
# @coefficient_names.delete_at(i)
vectors.delete_at(i)
end
x = Matrix.columns(vectors)
+ xt = x.t
# try again
begin
- @xtxi = (x.t * x).inverse
+ @xtxi = (xt * x).inverse
rescue ExceptionForMatrix::ErrNotRegular
raise "Multiple solutions - GSL is needed to select one"
end
end
- v2 = matrix_arr(@xtxi * x.t * y)
+ # huge performance boost
+ # by multiplying xt * y first
+ v2 = matrix_arr(@xtxi * (xt * y))
# add back removed
removed.sort.each do |i|
v2.insert(i, 0)
end
@@ -99,23 +103,11 @@
Hash[@coefficient_names.zip(v)]
end
end
def evaluate(data, y = nil)
- actual = y
-
- actual ||=
- if daru?(data)
- data[@target].to_a
- else
- data.map { |v| v[@target] }
- end
-
- actual = prep_y(actual)
-
- estimated = predict(data)
- Eps.metrics(actual, estimated)
+ super(data, y, target: @target)
end
# https://people.richland.edu/james/ictcm/2004/multiple.html
def summary(extended: false)
@summary_str ||= begin
@@ -150,30 +142,9 @@
private
def constant?(arr)
arr.all? { |x| x == arr[0] }
- end
-
- # determine if target is a string or symbol
- def prep_target(target, data)
- if daru?(data)
- data.has_vector?(target) ? target : flip_target(target)
- else
- x = data[0] || {}
- x[target] ? target : flip_target(target)
- end
- end
-
- def flip_target(target)
- target.is_a?(String) ? target.to_sym : target.to_s
- end
-
- def prep_y(y)
- y.each do |yi|
- raise "Target missing in data" if yi.nil?
- end
- y.map(&:to_f)
end
# add epsilon for perfect fits
# consistent with GSL
def t_value