Sha256: d4a3af17005634e76e3788e99e784be37f06b5e966f90734b02d046f209a4ec6

Contents?: true

Size: 1.93 KB

Versions: 1

Compression:

Stored size: 1.93 KB

Contents

# frozen_string_literal: true

module RedAmber
  # group class
  class Group
    # Creates a new Group object.
    #
    # @param dataframe [DataFrame] dataframe to be grouped.
    # @param group_keys [Array<>] keys for grouping.
    def initialize(dataframe, *group_keys)
      @dataframe = dataframe
      @table = @dataframe.table
      @group_keys = group_keys.flatten

      raise GroupArgumentError, 'group_keys is empty.' if @group_keys.empty?

      d = @group_keys - @dataframe.keys
      raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless d.empty?

      @group = @table.group(*@group_keys)
    end

    functions = %i[count sum product mean min max stddev variance]
    functions.each do |function|
      define_method(function) do |*summary_keys|
        by(function, summary_keys)
      end
    end

    def inspect
      tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h|
        h[v.key] = v.tally
      end
      "#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>"
    end

    def summarize(&block)
      agg = instance_eval(&block)
      case agg
      when DataFrame
        agg
      when Array
        agg.reduce { |aggregated, df| aggregated.assign(df.to_h) }
      else
        raise GroupArgumentError, "Unknown argument: #{agg}"
      end
    end

    private

    def by(func, summary_keys)
      summary_keys = Array(summary_keys).flatten
      d = summary_keys - @dataframe.keys
      raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty?

      df = RedAmber::DataFrame.new(@group.send(func, *summary_keys))
      df = df.pick(@group_keys, df.keys - @group_keys)
      # if counts are the same (and do not include NaN or nil), aggregate count columns.
      if func == :count && df.pick(@group_keys.size..).to_h.values.uniq.size == 1
        df = df.pick(0..@group_keys.size).rename { [keys[-1], :count] }
      end
      df
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
red_amber-0.2.2 lib/red_amber/group.rb