Sha256: 40c05730485a0289057c4cb2b8f2e6613e14d4076c286054b9e63d7949f07fa7

Contents?: true

Size: 1 KB

Versions: 1

Compression:

Stored size: 1 KB

Contents

module Eps
  class LabelEncoder
    attr_reader :labels

    def initialize
      @labels = {}
    end

    def fit(y)
      labels = {}
      y.compact.map(&:to_s).uniq.sort.each_with_index do |label, i|
        labels[label] = i
      end
      @labels = labels
    end

    def fit_transform(y)
      fit(y)
      transform(y)
    end

    def transform(y)
      y.map do |yi|
        if yi.nil?
          nil
        else
          # use an additional label for unseen values
          # this is only used during training for the LightGBM eval_set
          # LightGBM ignores them (only uses seen categories for predictions)
          # https://github.com/microsoft/LightGBM/issues/1936
          # the evaluator also ignores them (to be consistent with LightGBM)
          # but doesn't use this code
          @labels[yi.to_s] || @labels.size
        end
      end
    end

    def inverse_transform(y)
      inverse = @labels.map(&:reverse).to_h
      y.map do |yi|
        inverse[yi.to_i]
      end
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
eps-0.5.0 lib/eps/label_encoder.rb