Sha256: d4a3af17005634e76e3788e99e784be37f06b5e966f90734b02d046f209a4ec6
Contents?: true
Size: 1.93 KB
Versions: 1
Compression:
Stored size: 1.93 KB
Contents
# frozen_string_literal: true module RedAmber # group class class Group # Creates a new Group object. # # @param dataframe [DataFrame] dataframe to be grouped. # @param group_keys [Array<>] keys for grouping. def initialize(dataframe, *group_keys) @dataframe = dataframe @table = @dataframe.table @group_keys = group_keys.flatten raise GroupArgumentError, 'group_keys is empty.' if @group_keys.empty? d = @group_keys - @dataframe.keys raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless d.empty? @group = @table.group(*@group_keys) end functions = %i[count sum product mean min max stddev variance] functions.each do |function| define_method(function) do |*summary_keys| by(function, summary_keys) end end def inspect tallys = @dataframe.pick(@group_keys).vectors.map.with_object({}) do |v, h| h[v.key] = v.tally end "#<#{self.class}:#{format('0x%016x', object_id)}\n#{tallys}>" end def summarize(&block) agg = instance_eval(&block) case agg when DataFrame agg when Array agg.reduce { |aggregated, df| aggregated.assign(df.to_h) } else raise GroupArgumentError, "Unknown argument: #{agg}" end end private def by(func, summary_keys) summary_keys = Array(summary_keys).flatten d = summary_keys - @dataframe.keys raise GroupArgumentError, "#{d} is not a key of\n #{@dataframe}." unless summary_keys.empty? || d.empty? df = RedAmber::DataFrame.new(@group.send(func, *summary_keys)) df = df.pick(@group_keys, df.keys - @group_keys) # if counts are the same (and do not include NaN or nil), aggregate count columns. if func == :count && df.pick(@group_keys.size..).to_h.values.uniq.size == 1 df = df.pick(0..@group_keys.size).rename { [keys[-1], :count] } end df end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
red_amber-0.2.2 | lib/red_amber/group.rb |