lib/remi/data_frame/daru.rb in remi-0.2.42 vs lib/remi/data_frame/daru.rb in remi-0.3.0

- old
+ new

@@ -11,11 +11,11 @@ end end # Public: Returns the type of DataFrame - def remi_df_type + def df_type :daru end # Public: Saves a Dataframe to a file. def hash_dump(filename) @@ -24,44 +24,8 @@ # Public: Creates a DataFrame by reading the dumped version from a file. def self.from_hash_dump(filename) Marshal.load(File.binread(filename)) end - - # Public: Allows the user to define an arbitrary aggregation function. - # - # by - The name of the DataFrame vector to use to group records. - # func - A lambda function that accepts three arguments - the - # first argument is the DataFrame, the second is the - # key to the current group, and the third is the index - # of the elements belonging to a group. - # - # Example: - # df = Remi::DataFrame::Daru.new( { a: ['a','a','a','b','b'], year: ['2018','2015','2019', '2014', '2013'] }) - # - # mymin = lambda do |vector, df, group_key, indices| - # values = indices.map { |idx| df.row[idx][vector] } - # "Group #{group_key} has a minimum value of #{values.min}" - # end - # - # df.aggregate(by: :a, func: mymin.curry.(:year)) - # - # - # Returns a Daru::Vector. - def aggregate(by:, func:) - grouped = self.group_by(by) - df_indices = self.index.to_a - ::Daru::Vector.new( - grouped.groups.reduce({}) do |h, (key, indices)| - # Daru groups don't use the index of the dataframe when returning groups (WTF?). - # Instead they return the position of the record in the dataframe. Here, we - group_df_indices = indices.map { |v| df_indices[v] } - group_key = key.size == 1 ? key.first : key - h[group_key] = func.(self, group_key, group_df_indices) - h - end - ) - end - end end end