Sha256: fa0a858a350324ae5a8e043711c281294880ca9f0a26fd9bb3a30720b4f98c47

Contents?: true

Size: 1 KB

Versions: 11

Compression:

Stored size: 1 KB

Contents

module Eps
  class LabelEncoder
    attr_reader :labels

    def initialize
      @labels = {}
    end

    def fit(y)
      labels = {}
      y.compact.map(&:to_s).uniq.sort.each_with_index do |label, i|
        labels[label] = i
      end
      @labels = labels
    end

    def fit_transform(y)
      fit(y)
      transform(y)
    end

    def transform(y)
      y.map do |yi|
        if yi.nil?
          nil
        else
          # use an additional label for unseen values
          # this is only used during training for the LightGBM eval_set
          # LightGBM ignores them (only uses seen categories for predictions)
          # https://github.com/microsoft/LightGBM/issues/1936
          # the evaluator also ignores them (to be consistent with LightGBM)
          # but doesn't use this code
          @labels[yi.to_s] || @labels.size
        end
      end
    end

    def inverse_transform(y)
      inverse = Hash[@labels.map(&:reverse)]
      y.map do |yi|
        inverse[yi.to_i]
      end
    end
  end
end

Version data entries

11 entries across 11 versions & 1 rubygems

Version Path
eps-0.4.1 lib/eps/label_encoder.rb
eps-0.4.0 lib/eps/label_encoder.rb
eps-0.3.9 lib/eps/label_encoder.rb
eps-0.3.8 lib/eps/label_encoder.rb
eps-0.3.7 lib/eps/label_encoder.rb
eps-0.3.6 lib/eps/label_encoder.rb
eps-0.3.5 lib/eps/label_encoder.rb
eps-0.3.4 lib/eps/label_encoder.rb
eps-0.3.3 lib/eps/label_encoder.rb
eps-0.3.2 lib/eps/label_encoder.rb
eps-0.3.1 lib/eps/label_encoder.rb