Sha256: 640c115fc491cc50c1afe14841189767365dac621cfacda55a8bafd9e301cae0

Contents?: true

Size: 1.53 KB

Versions: 2

Compression:

Stored size: 1.53 KB

Contents

module Eps
  module Evaluators
    class LinearRegression
      attr_reader :features

      def initialize(coefficients:, features:, text_features:)
        @coefficients = Hash[coefficients.map { |k, v| [k.is_a?(Array) ? [k[0].to_s, k[1]] : k.to_s, v] }]
        @features = features
        @text_features = text_features || {}
      end

      def predict(x)
        intercept = @coefficients["_intercept"] || 0.0
        scores = [intercept] * x.size

        @features.each do |k, type|
          raise "Missing data in #{k}" if !x.columns[k] || x.columns[k].any?(&:nil?)

          case type
          when "categorical"
            x.columns[k].each_with_index do |xv, i|
              scores[i] += @coefficients[[k, xv]].to_f
            end
          when "text"
            encoder = TextEncoder.new(@text_features[k])
            counts = encoder.transform(x.columns[k])
            coef = {}
            @coefficients.each do |k2, v|
              next unless k2.is_a?(Array) && k2.first == k
              coef[k2.last] = v
            end

            counts.each_with_index do |xc, i|
              xc.each do |word, count|
                scores[i] += coef[word] * count if coef[word]
              end
            end
          else
            coef = @coefficients[k].to_f
            x.columns[k].each_with_index do |xv, i|
              scores[i] += coef * xv
            end
          end
        end

        scores
      end

      def coefficients
        Hash[@coefficients.map { |k, v| [Array(k).join.to_sym, v] }]
      end
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
eps-0.3.2 lib/eps/evaluators/linear_regression.rb
eps-0.3.1 lib/eps/evaluators/linear_regression.rb