group_by.rb in polars-df-0.1.5

- old
+ new

@@ -10,11 +10,52 @@
       self._dataframe_class = dataframe_class
       self.by = by
       self.maintain_order = maintain_order
     end
 
-    # def apply
+    # Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame.
+    #
+    # Implementing logic using a Ruby function is almost always _significantly_
+    # slower and more memory intensive than implementing the same logic using
+    # the native expression API because:
+
+    # - The native expression engine runs in Rust; UDFs run in Ruby.
+    # - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
+    # - Polars-native expressions can be parallelised (UDFs cannot).
+    # - Polars-native expressions can be logically optimised (UDFs cannot).
+    #
+    # Wherever possible you should strongly prefer the native expression API
+    # to achieve the best performance.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "id" => [0, 1, 2, 3, 4],
+    #       "color" => ["red", "green", "green", "red", "red"],
+    #       "shape" => ["square", "triangle", "square", "triangle", "square"]
+    #     }
+    #   )
+    #   df.groupby("color").apply { |group_df| group_df.sample(2) }
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────┬───────┬──────────┐
+    #   # │ id  ┆ color ┆ shape    │
+    #   # │ --- ┆ ---   ┆ ---      │
+    #   # │ i64 ┆ str   ┆ str      │
+    #   # ╞═════╪═══════╪══════════╡
+    #   # │ 1   ┆ green ┆ triangle │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ green ┆ square   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 4   ┆ red   ┆ square   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ red   ┆ triangle │
+    #   # └─────┴───────┴──────────┘
+    # def apply(&f)
+    #   _dataframe_class._from_rbdf(_df.groupby_apply(by, f))
     # end
 
     # Use multiple aggregations on columns.
     #
     # This can be combined with complete lazy API and is considered idiomatic polars.
@@ -180,11 +221,10 @@
           .collect(no_optimization: true, string_cache: false)
       )
       _dataframe_class._from_rbdf(df._df)
     end
 
-    # def pivot
-    # end
+    # pivot is deprecated
 
     # Aggregate the first values in the group.
     #
     # @return [DataFrame]
     #