module Eps class BaseRegressor attr_reader :coefficients def initialize(coefficients:) @coefficients = Hash[coefficients.map { |k, v| [k.to_sym, v] }] end def predict(x) singular = !(x.is_a?(Array) || daru?(x)) x = [x] if singular x, c = prep_x(x, train: false) coef = c.map do |v| # use 0 if coefficient does not exist # this can happen for categorical features # since only n-1 coefficients are stored coefficients[v] || 0 end x = Matrix.rows(x) c = Matrix.column_vector(coef) pred = matrix_arr(x * c) singular ? pred[0] : pred end # ruby def self.load(data) BaseRegressor.new(Hash[data.map { |k, v| [k.to_sym, v] }]) end def dump {coefficients: coefficients} end # json def self.load_json(data) data = JSON.parse(data) if data.is_a?(String) coefficients = data["coefficients"] # for R models if coefficients["(Intercept)"] coefficients = coefficients.dup coefficients["_intercept"] = coefficients.delete("(Intercept)") end BaseRegressor.new(coefficients: coefficients) end def to_json JSON.generate(dump) end # pmml def self.load_pmml(data) data = Nokogiri::XML(data) if data.is_a?(String) # TODO more validation node = data.css("RegressionTable") coefficients = { _intercept: node.attribute("intercept").value.to_f } node.css("NumericPredictor").each do |n| coefficients[n.attribute("name").value] = n.attribute("coefficient").value.to_f end node.css("CategoricalPredictor").each do |n| coefficients["#{n.attribute("name").value}#{n.attribute("value").value}"] = n.attribute("coefficient").value.to_f end BaseRegressor.new(coefficients: coefficients) end # pfa def self.load_pfa(data) data = JSON.parse(data) if data.is_a?(String) init = data["cells"].first[1]["init"] names = if data["input"]["fields"] data["input"]["fields"].map { |f| f["name"] } else init["coeff"].map.with_index { |_, i| "x#{i}" } end coefficients = { _intercept: init["const"] } init["coeff"].each_with_index do |c, i| name = names[i] # R can export coefficients with same name raise "Coefficients with same name" if coefficients[name] coefficients[name] = c end BaseRegressor.new(coefficients: coefficients) end private def daru?(x) defined?(Daru) && x.is_a?(Daru::DataFrame) end def prep_x(x, train: true) if daru?(x) x = x.to_a[0] else x = x.map do |xi| case xi when Hash xi when Array Hash[xi.map.with_index { |v, i| [:"x#{i}", v] }] else {x0: xi} end end end # if !train && x.any? # # check first row against coefficients # ckeys = coefficients.keys.map(&:to_s) # bad_keys = x[0].keys.map(&:to_s).reject { |k| ckeys.any? { |c| c.start_with?(k) } } # raise "Unknown keys: #{bad_keys.join(", ")}" if bad_keys.any? # end cache = {} first_key = {} i = 0 rows = [] x.each do |xi| row = {} xi.each do |k, v| key = v.is_a?(String) ? [k.to_sym, v] : k.to_sym v2 = v.is_a?(String) ? 1 : v raise "Missing data" if v2.nil? unless cache[key] cache[key] = i first_key[k] ||= key if v.is_a?(String) i += 1 end row[key] = v2 end rows << row end if train # remove one degree of freedom first_key.values.each do |v| num = cache.delete(v) cache.each do |k, v2| cache[k] -= 1 if v2 > num end end end ret2 = [] rows.each do |row| ret = [0] * cache.size row.each do |k, v| if cache[k] ret[cache[k]] = v end end ret2 << ([1] + ret) end # flatten keys c = [:_intercept] + cache.sort_by { |_, v| v }.map { |k, _| (k.is_a?(Array) ? k.join("") : k).to_sym } if c.size != c.uniq.size raise "Overlapping coefficients" end [ret2, c] end def matrix_arr(matrix) matrix.to_a.map { |xi| xi[0].to_f } end end end