data_frame.rb in polars-df-0.1.5

- old
+ new

@@ -24,18 +24,18 @@
           data[k] = result.rows.map { |r| r[i] }
         end
       end
 
       if data.nil?
-        self._df = hash_to_rbdf({}, columns: columns)
+        self._df = self.class.hash_to_rbdf({}, columns: columns)
       elsif data.is_a?(Hash)
         data = data.transform_keys { |v| v.is_a?(Symbol) ? v.to_s : v }
-        self._df = hash_to_rbdf(data, columns: columns)
+        self._df = self.class.hash_to_rbdf(data, columns: columns)
       elsif data.is_a?(Array)
-        self._df = sequence_to_rbdf(data, columns: columns, orient: orient)
+        self._df = self.class.sequence_to_rbdf(data, columns: columns, orient: orient)
       elsif data.is_a?(Series)
-        self._df = series_to_rbdf(data, columns: columns)
+        self._df = self.class.series_to_rbdf(data, columns: columns)
       else
         raise ArgumentError, "DataFrame constructor called with unsupported type; got #{data.class.name}"
       end
     end
 
@@ -44,15 +44,20 @@
       df = DataFrame.allocate
       df._df = rb_df
       df
     end
 
-    # def self._from_hashes
-    # end
+    # @private
+    def self._from_hashes(data, infer_schema_length: 100, schema: nil)
+      rbdf = RbDataFrame.read_hashes(data, infer_schema_length, schema)
+      _from_rbdf(rbdf)
+    end
 
-    # def self._from_hash
-    # end
+    # @private
+    def self._from_hash(data, columns: nil)
+      _from_rbdf(hash_to_rbdf(data, columns: columns))
+    end
 
     # def self._from_records
     # end
 
     # def self._from_numo
@@ -184,12 +189,18 @@
           low_memory
         )
       )
     end
 
-    # def self._read_avro
-    # end
+    # @private
+    def self._read_avro(file, columns: nil, n_rows: nil)
+      if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
+        file = Utils.format_path(file)
+      end
+      projection, columns = Utils.handle_projection_columns(columns)
+      _from_rbdf(RbDataFrame.read_avro(file, columns, projection, n_rows))
+    end
 
     # @private
     def self._read_ipc(
       file,
       columns: nil,
@@ -484,16 +495,10 @@
     end
 
     # def each
     # end
 
-    # def _pos_idx
-    # end
-
-    # def _pos_idxs
-    # end
-
     # Returns subset of the DataFrame.
     #
     # @return [Object]
     def [](*args)
       if args.size == 2
@@ -552,23 +557,37 @@
           return Utils.wrap_s(_df.column(item))
         end
 
         # df[idx]
         if item.is_a?(Integer)
-          return slice(_pos_idx(item, dim: 0), 1)
+          return slice(_pos_idx(item, 0), 1)
         end
 
         # df[..]
         if item.is_a?(Range)
           return Slice.new(self).apply(item)
         end
+
+        if Utils.is_str_sequence(item, allow_str: false)
+          # select multiple columns
+          # df[["foo", "bar"]]
+          return _from_rbdf(_df.select(item))
+        end
       end
 
       raise ArgumentError, "Cannot get item of type: #{item.class.name}"
     end
 
+    # Set item.
+    #
+    # @return [Object]
     # def []=(key, value)
+    #   if key.is_a?(String)
+    #     raise TypeError, "'DataFrame' object does not support 'Series' assignment by index. Use 'DataFrame.with_columns'"
+    #   end
+
+    #   raise Todo
     # end
 
     # no to_arrow
 
     # Convert DataFrame to a hash mapping column name to values.
@@ -580,13 +599,29 @@
       else
         get_columns.to_h { |s| [s.name, s.to_a] }
       end
     end
 
-    # def to_hashes / to_a
-    # end
+    # Convert every row to a dictionary.
+    #
+    # Note that this is slow.
+    #
+    # @return [Array]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
+    #   df.to_hashes
+    #   [{'foo': 1, 'bar': 4}, {'foo': 2, 'bar': 5}, {'foo': 3, 'bar': 6}]
+    def to_hashes
+      rbdf = _df
+      names = columns
 
+      height.times.map do |i|
+        names.zip(rbdf.row_tuple(i)).to_h
+      end
+    end
+
     # def to_numo
     # end
 
     # no to_pandas
 
@@ -760,13 +795,29 @@
         null_value,
       )
       nil
     end
 
-    # def write_avro
-    # end
+    # Write to Apache Avro file.
+    #
+    # @param file [String]
+    #   File path to which the file should be written.
+    # @param compression ["uncompressed", "snappy", "deflate"]
+    #   Compression method. Defaults to "uncompressed".
+    #
+    # @return [nil]
+    def write_avro(file, compression = "uncompressed")
+      if compression.nil?
+        compression = "uncompressed"
+      end
+      if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
+        file = Utils.format_path(file)
+      end
 
+      _df.write_avro(file, compression)
+    end
+
     # Write to Arrow IPC binary stream or Feather file.
     #
     # @param file [String]
     #   File path to which the file should be written.
     # @param compression ["uncompressed", "lz4", "zstd"]
@@ -864,13 +915,89 @@
     def estimated_size(unit = "b")
       sz = _df.estimated_size
       Utils.scale_bytes(sz, to: unit)
     end
 
-    # def transpose
-    # end
+    # Transpose a DataFrame over the diagonal.
+    #
+    # @param include_header [Boolean]
+    #   If set, the column names will be added as first column.
+    # @param header_name [String]
+    #   If `include_header` is set, this determines the name of the column that will
+    #   be inserted.
+    # @param column_names [Array]
+    #   Optional generator/iterator that yields column names. Will be used to
+    #   replace the columns in the DataFrame.
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   This is a very expensive operation. Perhaps you can do it differently.
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3], "b" => [1, 2, 3]})
+    #   df.transpose(include_header: true)
+    #   # =>
+    #   # shape: (2, 4)
+    #   # ┌────────┬──────────┬──────────┬──────────┐
+    #   # │ column ┆ column_0 ┆ column_1 ┆ column_2 │
+    #   # │ ---    ┆ ---      ┆ ---      ┆ ---      │
+    #   # │ str    ┆ i64      ┆ i64      ┆ i64      │
+    #   # ╞════════╪══════════╪══════════╪══════════╡
+    #   # │ a      ┆ 1        ┆ 2        ┆ 3        │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ b      ┆ 1        ┆ 2        ┆ 3        │
+    #   # └────────┴──────────┴──────────┴──────────┘
+    #
+    # @example Replace the auto-generated column names with a list
+    #   df.transpose(include_header: false, column_names: ["a", "b", "c"])
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ a   ┆ b   ┆ c   │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 2   ┆ 3   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1   ┆ 2   ┆ 3   │
+    #   # └─────┴─────┴─────┘
+    #
+    # @example Include the header as a separate column
+    #   df.transpose(
+    #     include_header: true, header_name: "foo", column_names: ["a", "b", "c"]
+    #   )
+    #   # =>
+    #   # shape: (2, 4)
+    #   # ┌─────┬─────┬─────┬─────┐
+    #   # │ foo ┆ a   ┆ b   ┆ c   │
+    #   # │ --- ┆ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╪═════╡
+    #   # │ a   ┆ 1   ┆ 2   ┆ 3   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ b   ┆ 1   ┆ 2   ┆ 3   │
+    #   # └─────┴─────┴─────┴─────┘
+    def transpose(include_header: false, header_name: "column", column_names: nil)
+      df = _from_rbdf(_df.transpose(include_header, header_name))
+      if !column_names.nil?
+        names = []
+        n = df.width
+        if include_header
+          names << header_name
+          n -= 1
+        end
 
+        column_names = column_names.each
+        n.times do
+          names << column_names.next
+        end
+        df.columns = names
+      end
+      df
+    end
+
     # Reverse the DataFrame.
     #
     # @return [DataFrame]
     #
     # @example
@@ -1460,12 +1587,52 @@
         subset = [subset]
       end
       _from_rbdf(_df.drop_nulls(subset))
     end
 
-    # def pipe
-    # end
+    # Offers a structured way to apply a sequence of user-defined functions (UDFs).
+    #
+    # @param func [Object]
+    #   Callable; will receive the frame as the first parameter,
+    #   followed by any given args/kwargs.
+    # @param args [Object]
+    #   Arguments to pass to the UDF.
+    # @param kwargs [Object]
+    #   Keyword arguments to pass to the UDF.
+    #
+    # @return [Object]
+    #
+    # @note
+    #   It is recommended to use LazyFrame when piping operations, in order
+    #   to fully take advantage of query optimization and parallelization.
+    #   See {#lazy}.
+    #
+    # @example
+    #   cast_str_to_int = lambda do |data, col_name:|
+    #     data.with_column(Polars.col(col_name).cast(:i64))
+    #   end
+    #
+    #   df = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => ["10", "20", "30", "40"]})
+    #   df.pipe(cast_str_to_int, col_name: "b")
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 10  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 20  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 30  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 4   ┆ 40  │
+    #   # └─────┴─────┘
+    def pipe(func, *args, **kwargs, &block)
+      func.call(self, *args, **kwargs, &block)
+    end
 
     # Add a column at index 0 that counts the rows.
     #
     # @param name [String]
     #   Name of the column to add.
@@ -1545,22 +1712,617 @@
         self.class,
         maintain_order: maintain_order
       )
     end
 
-    # def groupby_rolling
-    # end
+    # Create rolling groups based on a time column.
+    #
+    # Also works for index values of type `:i32` or `:i64`.
+    #
+    # Different from a `dynamic_groupby` the windows are now determined by the
+    # individual values and are not of constant intervals. For constant intervals use
+    # *groupby_dynamic*
+    #
+    # The `period` and `offset` arguments are created either from a timedelta, or
+    # by using the following string language:
+    #
+    # - 1ns   (1 nanosecond)
+    # - 1us   (1 microsecond)
+    # - 1ms   (1 millisecond)
+    # - 1s    (1 second)
+    # - 1m    (1 minute)
+    # - 1h    (1 hour)
+    # - 1d    (1 day)
+    # - 1w    (1 week)
+    # - 1mo   (1 calendar month)
+    # - 1y    (1 calendar year)
+    # - 1i    (1 index count)
+    #
+    # Or combine them:
+    # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
+    #
+    # In case of a groupby_rolling on an integer column, the windows are defined by:
+    #
+    # - **"1i"      # length 1**
+    # - **"10i"     # length 10**
+    #
+    # @param index_column [Object]
+    #   Column used to group based on the time window.
+    #   Often to type Date/Datetime
+    #   This column must be sorted in ascending order. If not the output will not
+    #   make sense.
+    #
+    #   In case of a rolling groupby on indices, dtype needs to be one of
+    #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
+    #   performance matters use an `:i64` column.
+    # @param period [Object]
+    #   Length of the window.
+    # @param offset [Object]
+    #   Offset of the window. Default is -period.
+    # @param closed ["right", "left", "both", "none"]
+    #   Define whether the temporal window interval is closed or not.
+    # @param by [Object]
+    #   Also group by this column/these columns.
+    #
+    # @return [RollingGroupBy]
+    #
+    # @example
+    #   dates = [
+    #     "2020-01-01 13:45:48",
+    #     "2020-01-01 16:42:13",
+    #     "2020-01-01 16:45:09",
+    #     "2020-01-02 18:12:48",
+    #     "2020-01-03 19:45:32",
+    #     "2020-01-08 23:16:43"
+    #   ]
+    #   df = Polars::DataFrame.new({"dt" => dates, "a" => [3, 7, 5, 9, 2, 1]}).with_column(
+    #     Polars.col("dt").str.strptime(:datetime)
+    #   )
+    #   df.groupby_rolling(index_column: "dt", period: "2d").agg(
+    #     [
+    #       Polars.sum("a").alias("sum_a"),
+    #       Polars.min("a").alias("min_a"),
+    #       Polars.max("a").alias("max_a")
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (6, 4)
+    #   # ┌─────────────────────┬───────┬───────┬───────┐
+    #   # │ dt                  ┆ sum_a ┆ min_a ┆ max_a │
+    #   # │ ---                 ┆ ---   ┆ ---   ┆ ---   │
+    #   # │ datetime[μs]        ┆ i64   ┆ i64   ┆ i64   │
+    #   # ╞═════════════════════╪═══════╪═══════╪═══════╡
+    #   # │ 2020-01-01 13:45:48 ┆ 3     ┆ 3     ┆ 3     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2020-01-01 16:42:13 ┆ 10    ┆ 3     ┆ 7     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2020-01-01 16:45:09 ┆ 15    ┆ 3     ┆ 7     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2020-01-02 18:12:48 ┆ 24    ┆ 3     ┆ 9     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2020-01-03 19:45:32 ┆ 11    ┆ 2     ┆ 9     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2020-01-08 23:16:43 ┆ 1     ┆ 1     ┆ 1     │
+    #   # └─────────────────────┴───────┴───────┴───────┘
+    def groupby_rolling(
+      index_column:,
+      period:,
+      offset: nil,
+      closed: "right",
+      by: nil
+    )
+      RollingGroupBy.new(self, index_column, period, offset, closed, by)
+    end
 
-    # def groupby_dynamic
-    # end
+    # Group based on a time value (or index value of type `:i32`, `:i64`).
+    #
+    # Time windows are calculated and rows are assigned to windows. Different from a
+    # normal groupby is that a row can be member of multiple groups. The time/index
+    # window could be seen as a rolling window, with a window size determined by
+    # dates/times/values instead of slots in the DataFrame.
+    #
+    # A window is defined by:
+    #
+    # - every: interval of the window
+    # - period: length of the window
+    # - offset: offset of the window
+    #
+    # The `every`, `period` and `offset` arguments are created with
+    # the following string language:
+    #
+    # - 1ns   (1 nanosecond)
+    # - 1us   (1 microsecond)
+    # - 1ms   (1 millisecond)
+    # - 1s    (1 second)
+    # - 1m    (1 minute)
+    # - 1h    (1 hour)
+    # - 1d    (1 day)
+    # - 1w    (1 week)
+    # - 1mo   (1 calendar month)
+    # - 1y    (1 calendar year)
+    # - 1i    (1 index count)
+    #
+    # Or combine them:
+    # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
+    #
+    # In case of a groupby_dynamic on an integer column, the windows are defined by:
+    #
+    # - "1i"      # length 1
+    # - "10i"     # length 10
+    #
+    # @param index_column
+    #   Column used to group based on the time window.
+    #   Often to type Date/Datetime
+    #   This column must be sorted in ascending order. If not the output will not
+    #   make sense.
+    #
+    #   In case of a dynamic groupby on indices, dtype needs to be one of
+    #   `:i32`, `:i64`. Note that `:i32` gets temporarily cast to `:i64`, so if
+    #   performance matters use an `:i64` column.
+    # @param every
+    #   Interval of the window.
+    # @param period
+    #   Length of the window, if None it is equal to 'every'.
+    # @param offset
+    #   Offset of the window if None and period is None it will be equal to negative
+    #   `every`.
+    # @param truncate
+    #   Truncate the time value to the window lower bound.
+    # @param include_boundaries
+    #   Add the lower and upper bound of the window to the "_lower_bound" and
+    #   "_upper_bound" columns. This will impact performance because it's harder to
+    #   parallelize
+    # @param closed ["right", "left", "both", "none"]
+    #   Define whether the temporal window interval is closed or not.
+    # @param by
+    #   Also group by this column/these columns
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "time" => Polars.date_range(
+    #         DateTime.new(2021, 12, 16),
+    #         DateTime.new(2021, 12, 16, 3),
+    #         "30m"
+    #       ),
+    #       "n" => 0..6
+    #     }
+    #   )
+    #   # =>
+    #   # shape: (7, 2)
+    #   # ┌─────────────────────┬─────┐
+    #   # │ time                ┆ n   │
+    #   # │ ---                 ┆ --- │
+    #   # │ datetime[μs]        ┆ i64 │
+    #   # ╞═════════════════════╪═════╡
+    #   # │ 2021-12-16 00:00:00 ┆ 0   │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2021-12-16 00:30:00 ┆ 1   │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2021-12-16 01:00:00 ┆ 2   │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2021-12-16 01:30:00 ┆ 3   │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2021-12-16 02:00:00 ┆ 4   │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2021-12-16 02:30:00 ┆ 5   │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2021-12-16 03:00:00 ┆ 6   │
+    #   # └─────────────────────┴─────┘
+    #
+    # @example Group by windows of 1 hour starting at 2021-12-16 00:00:00.
+    #   df.groupby_dynamic("time", every: "1h", closed: "right").agg(
+    #     [
+    #       Polars.col("time").min.alias("time_min"),
+    #       Polars.col("time").max.alias("time_max")
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────────────────────┬─────────────────────┬─────────────────────┐
+    #   # │ time                ┆ time_min            ┆ time_max            │
+    #   # │ ---                 ┆ ---                 ┆ ---                 │
+    #   # │ datetime[μs]        ┆ datetime[μs]        ┆ datetime[μs]        │
+    #   # ╞═════════════════════╪═════════════════════╪═════════════════════╡
+    #   # │ 2021-12-15 23:00:00 ┆ 2021-12-16 00:00:00 ┆ 2021-12-16 00:00:00 │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 00:00:00 ┆ 2021-12-16 00:30:00 ┆ 2021-12-16 01:00:00 │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 01:00:00 ┆ 2021-12-16 01:30:00 ┆ 2021-12-16 02:00:00 │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 02:00:00 ┆ 2021-12-16 02:30:00 ┆ 2021-12-16 03:00:00 │
+    #   # └─────────────────────┴─────────────────────┴─────────────────────┘
+    #
+    # @example The window boundaries can also be added to the aggregation result.
+    #   df.groupby_dynamic(
+    #     "time", every: "1h", include_boundaries: true, closed: "right"
+    #   ).agg([Polars.col("time").count.alias("time_count")])
+    #   # =>
+    #   # shape: (4, 4)
+    #   # ┌─────────────────────┬─────────────────────┬─────────────────────┬────────────┐
+    #   # │ _lower_boundary     ┆ _upper_boundary     ┆ time                ┆ time_count │
+    #   # │ ---                 ┆ ---                 ┆ ---                 ┆ ---        │
+    #   # │ datetime[μs]        ┆ datetime[μs]        ┆ datetime[μs]        ┆ u32        │
+    #   # ╞═════════════════════╪═════════════════════╪═════════════════════╪════════════╡
+    #   # │ 2021-12-15 23:00:00 ┆ 2021-12-16 00:00:00 ┆ 2021-12-15 23:00:00 ┆ 1          │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 00:00:00 ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 00:00:00 ┆ 2          │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 2          │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 2          │
+    #   # └─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
+    #
+    # @example When closed="left", should not include right end of interval.
+    #   df.groupby_dynamic("time", every: "1h", closed: "left").agg(
+    #     [
+    #       Polars.col("time").count.alias("time_count"),
+    #       Polars.col("time").list.alias("time_agg_list")
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────────────────────┬────────────┬─────────────────────────────────────┐
+    #   # │ time                ┆ time_count ┆ time_agg_list                       │
+    #   # │ ---                 ┆ ---        ┆ ---                                 │
+    #   # │ datetime[μs]        ┆ u32        ┆ list[datetime[μs]]                  │
+    #   # ╞═════════════════════╪════════════╪═════════════════════════════════════╡
+    #   # │ 2021-12-16 00:00:00 ┆ 2          ┆ [2021-12-16 00:00:00, 2021-12-16... │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 01:00:00 ┆ 2          ┆ [2021-12-16 01:00:00, 2021-12-16... │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 02:00:00 ┆ 2          ┆ [2021-12-16 02:00:00, 2021-12-16... │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 03:00:00 ┆ 1          ┆ [2021-12-16 03:00:00]               │
+    #   # └─────────────────────┴────────────┴─────────────────────────────────────┘
+    #
+    # @example When closed="both" the time values at the window boundaries belong to 2 groups.
+    #   df.groupby_dynamic("time", every: "1h", closed: "both").agg(
+    #     [Polars.col("time").count.alias("time_count")]
+    #   )
+    #   # =>
+    #   # shape: (5, 2)
+    #   # ┌─────────────────────┬────────────┐
+    #   # │ time                ┆ time_count │
+    #   # │ ---                 ┆ ---        │
+    #   # │ datetime[μs]        ┆ u32        │
+    #   # ╞═════════════════════╪════════════╡
+    #   # │ 2021-12-15 23:00:00 ┆ 1          │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 00:00:00 ┆ 3          │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 01:00:00 ┆ 3          │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 02:00:00 ┆ 3          │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-12-16 03:00:00 ┆ 1          │
+    #   # └─────────────────────┴────────────┘
+    #
+    # @example Dynamic groupbys can also be combined with grouping on normal keys.
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "time" => Polars.date_range(
+    #         DateTime.new(2021, 12, 16),
+    #         DateTime.new(2021, 12, 16, 3),
+    #         "30m"
+    #       ),
+    #       "groups" => ["a", "a", "a", "b", "b", "a", "a"]
+    #     }
+    #   )
+    #   df.groupby_dynamic(
+    #     "time",
+    #     every: "1h",
+    #     closed: "both",
+    #     by: "groups",
+    #     include_boundaries: true
+    #   ).agg([Polars.col("time").count.alias("time_count")])
+    #   # =>
+    #   # shape: (7, 5)
+    #   # ┌────────┬─────────────────────┬─────────────────────┬─────────────────────┬────────────┐
+    #   # │ groups ┆ _lower_boundary     ┆ _upper_boundary     ┆ time                ┆ time_count │
+    #   # │ ---    ┆ ---                 ┆ ---                 ┆ ---                 ┆ ---        │
+    #   # │ str    ┆ datetime[μs]        ┆ datetime[μs]        ┆ datetime[μs]        ┆ u32        │
+    #   # ╞════════╪═════════════════════╪═════════════════════╪═════════════════════╪════════════╡
+    #   # │ a      ┆ 2021-12-15 23:00:00 ┆ 2021-12-16 00:00:00 ┆ 2021-12-15 23:00:00 ┆ 1          │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ a      ┆ 2021-12-16 00:00:00 ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 00:00:00 ┆ 3          │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ a      ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 1          │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ a      ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 2          │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ a      ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 04:00:00 ┆ 2021-12-16 03:00:00 ┆ 1          │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ b      ┆ 2021-12-16 01:00:00 ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 01:00:00 ┆ 2          │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ b      ┆ 2021-12-16 02:00:00 ┆ 2021-12-16 03:00:00 ┆ 2021-12-16 02:00:00 ┆ 1          │
+    #   # └────────┴─────────────────────┴─────────────────────┴─────────────────────┴────────────┘
+    #
+    # @example Dynamic groupby on an index column.
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "idx" => Polars.arange(0, 6, eager: true),
+    #       "A" => ["A", "A", "B", "B", "B", "C"]
+    #     }
+    #   )
+    #   df.groupby_dynamic(
+    #     "idx",
+    #     every: "2i",
+    #     period: "3i",
+    #     include_boundaries: true,
+    #     closed: "right"
+    #   ).agg(Polars.col("A").list.alias("A_agg_list"))
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌─────────────────┬─────────────────┬─────┬─────────────────┐
+    #   # │ _lower_boundary ┆ _upper_boundary ┆ idx ┆ A_agg_list      │
+    #   # │ ---             ┆ ---             ┆ --- ┆ ---             │
+    #   # │ i64             ┆ i64             ┆ i64 ┆ list[str]       │
+    #   # ╞═════════════════╪═════════════════╪═════╪═════════════════╡
+    #   # │ 0               ┆ 3               ┆ 0   ┆ ["A", "B", "B"] │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2               ┆ 5               ┆ 2   ┆ ["B", "B", "C"] │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 4               ┆ 7               ┆ 4   ┆ ["C"]           │
+    #   # └─────────────────┴─────────────────┴─────┴─────────────────┘
+    def groupby_dynamic(
+      index_column,
+      every:,
+      period: nil,
+      offset: nil,
+      truncate: true,
+      include_boundaries: false,
+      closed: "left",
+      by: nil
+    )
+      DynamicGroupBy.new(
+        self,
+        index_column,
+        every,
+        period,
+        offset,
+        truncate,
+        include_boundaries,
+        closed,
+        by
+      )
+    end
 
-    # def upsample
-    # end
+    # Upsample a DataFrame at a regular frequency.
+    #
+    # @param time_column [Object]
+    #   time column will be used to determine a date_range.
+    #   Note that this column has to be sorted for the output to make sense.
+    # @param every [String]
+    #   interval will start 'every' duration
+    # @param offset [String]
+    #   change the start of the date_range by this offset.
+    # @param by [Object]
+    #   First group by these columns and then upsample for every group
+    # @param maintain_order [Boolean]
+    #   Keep the ordering predictable. This is slower.
+    #
+    # The `every` and `offset` arguments are created with
+    # the following string language:
+    #
+    # - 1ns   (1 nanosecond)
+    # - 1us   (1 microsecond)
+    # - 1ms   (1 millisecond)
+    # - 1s    (1 second)
+    # - 1m    (1 minute)
+    # - 1h    (1 hour)
+    # - 1d    (1 day)
+    # - 1w    (1 week)
+    # - 1mo   (1 calendar month)
+    # - 1y    (1 calendar year)
+    # - 1i    (1 index count)
+    #
+    # Or combine them:
+    # "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
+    #
+    # @return [DataFrame]
+    #
+    # @example Upsample a DataFrame by a certain interval.
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "time" => [
+    #         DateTime.new(2021, 2, 1),
+    #         DateTime.new(2021, 4, 1),
+    #         DateTime.new(2021, 5, 1),
+    #         DateTime.new(2021, 6, 1)
+    #       ],
+    #       "groups" => ["A", "B", "A", "B"],
+    #       "values" => [0, 1, 2, 3]
+    #     }
+    #   )
+    #   df.upsample(
+    #     time_column: "time", every: "1mo", by: "groups", maintain_order: true
+    #   ).select(Polars.all.forward_fill)
+    #   # =>
+    #   # shape: (7, 3)
+    #   # ┌─────────────────────┬────────┬────────┐
+    #   # │ time                ┆ groups ┆ values │
+    #   # │ ---                 ┆ ---    ┆ ---    │
+    #   # │ datetime[ns]        ┆ str    ┆ i64    │
+    #   # ╞═════════════════════╪════════╪════════╡
+    #   # │ 2021-02-01 00:00:00 ┆ A      ┆ 0      │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-03-01 00:00:00 ┆ A      ┆ 0      │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-04-01 00:00:00 ┆ A      ┆ 0      │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-05-01 00:00:00 ┆ A      ┆ 2      │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-04-01 00:00:00 ┆ B      ┆ 1      │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-05-01 00:00:00 ┆ B      ┆ 1      │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 2021-06-01 00:00:00 ┆ B      ┆ 3      │
+    #   # └─────────────────────┴────────┴────────┘
+    def upsample(
+      time_column:,
+      every:,
+      offset: nil,
+      by: nil,
+      maintain_order: false
+    )
+      if by.nil?
+        by = []
+      end
+      if by.is_a?(String)
+        by = [by]
+      end
+      if offset.nil?
+        offset = "0ns"
+      end
 
-    # def join_asof
-    # end
+      every = Utils._timedelta_to_pl_duration(every)
+      offset = Utils._timedelta_to_pl_duration(offset)
 
+      _from_rbdf(
+        _df.upsample(by, time_column, every, offset, maintain_order)
+      )
+    end
+
+    # Perform an asof join.
+    #
+    # This is similar to a left-join except that we match on nearest key rather than
+    # equal keys.
+    #
+    # Both DataFrames must be sorted by the asof_join key.
+    #
+    # For each row in the left DataFrame:
+    #
+    # - A "backward" search selects the last row in the right DataFrame whose 'on' key is less than or equal to the left's key.
+    # - A "forward" search selects the first row in the right DataFrame whose 'on' key is greater than or equal to the left's key.
+    #
+    # The default is "backward".
+    #
+    # @param other [DataFrame]
+    #   DataFrame to join with.
+    # @param left_on [String]
+    #   Join column of the left DataFrame.
+    # @param right_on [String]
+    #   Join column of the right DataFrame.
+    # @param on [String]
+    #   Join column of both DataFrames. If set, `left_on` and `right_on` should be
+    #   None.
+    # @param by [Object]
+    #   join on these columns before doing asof join
+    # @param by_left [Object]
+    #   join on these columns before doing asof join
+    # @param by_right [Object]
+    #   join on these columns before doing asof join
+    # @param strategy ["backward", "forward"]
+    #   Join strategy.
+    # @param suffix [String]
+    #   Suffix to append to columns with a duplicate name.
+    # @param tolerance [Object]
+    #   Numeric tolerance. By setting this the join will only be done if the near
+    #   keys are within this distance. If an asof join is done on columns of dtype
+    #   "Date", "Datetime", "Duration" or "Time" you use the following string
+    #   language:
+    #
+    #    - 1ns   (1 nanosecond)
+    #    - 1us   (1 microsecond)
+    #    - 1ms   (1 millisecond)
+    #    - 1s    (1 second)
+    #    - 1m    (1 minute)
+    #    - 1h    (1 hour)
+    #    - 1d    (1 day)
+    #    - 1w    (1 week)
+    #    - 1mo   (1 calendar month)
+    #    - 1y    (1 calendar year)
+    #    - 1i    (1 index count)
+    #
+    #    Or combine them:
+    #    "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
+    #
+    # @param allow_parallel [Boolean]
+    #   Allow the physical plan to optionally evaluate the computation of both
+    #   DataFrames up to the join in parallel.
+    # @param force_parallel [Boolean]
+    #   Force the physical plan to evaluate the computation of both DataFrames up to
+    #   the join in parallel.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   gdp = Polars::DataFrame.new(
+    #     {
+    #       "date" => [
+    #         DateTime.new(2016, 1, 1),
+    #         DateTime.new(2017, 1, 1),
+    #         DateTime.new(2018, 1, 1),
+    #         DateTime.new(2019, 1, 1),
+    #       ],  # note record date: Jan 1st (sorted!)
+    #       "gdp" => [4164, 4411, 4566, 4696]
+    #     }
+    #   )
+    #   population = Polars::DataFrame.new(
+    #     {
+    #       "date" => [
+    #         DateTime.new(2016, 5, 12),
+    #         DateTime.new(2017, 5, 12),
+    #         DateTime.new(2018, 5, 12),
+    #         DateTime.new(2019, 5, 12),
+    #       ],  # note record date: May 12th (sorted!)
+    #       "population" => [82.19, 82.66, 83.12, 83.52]
+    #     }
+    #   )
+    #   population.join_asof(
+    #     gdp, left_on: "date", right_on: "date", strategy: "backward"
+    #   )
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────────────────────┬────────────┬──────┐
+    #   # │ date                ┆ population ┆ gdp  │
+    #   # │ ---                 ┆ ---        ┆ ---  │
+    #   # │ datetime[ns]        ┆ f64        ┆ i64  │
+    #   # ╞═════════════════════╪════════════╪══════╡
+    #   # │ 2016-05-12 00:00:00 ┆ 82.19      ┆ 4164 │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 2017-05-12 00:00:00 ┆ 82.66      ┆ 4411 │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 2018-05-12 00:00:00 ┆ 83.12      ┆ 4566 │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 2019-05-12 00:00:00 ┆ 83.52      ┆ 4696 │
+    #   # └─────────────────────┴────────────┴──────┘
+    def join_asof(
+      other,
+      left_on: nil,
+      right_on: nil,
+      on: nil,
+      by_left: nil,
+      by_right: nil,
+      by: nil,
+      strategy: "backward",
+      suffix: "_right",
+      tolerance: nil,
+      allow_parallel: true,
+      force_parallel: false
+    )
+      lazy
+        .join_asof(
+          other.lazy,
+          left_on: left_on,
+          right_on: right_on,
+          on: on,
+          by_left: by_left,
+          by_right: by_right,
+          by: by,
+          strategy: strategy,
+          suffix: suffix,
+          tolerance: tolerance,
+          allow_parallel: allow_parallel,
+          force_parallel: force_parallel
+        )
+        .collect(no_optimization: true)
+    end
+
     # Join in SQL-like fashion.
     #
     # @param other [DataFrame]
     #   DataFrame to join with.
     # @param left_on [Object]
@@ -1673,12 +2435,82 @@
           suffix: suffix,
         )
         .collect(no_optimization: true)
     end
 
-    # def apply
-    # end
+    # Apply a custom/user-defined function (UDF) over the rows of the DataFrame.
+    #
+    # The UDF will receive each row as a tuple of values: `udf(row)`.
+    #
+    # Implementing logic using a Ruby function is almost always _significantly_
+    # slower and more memory intensive than implementing the same logic using
+    # the native expression API because:
+    #
+    # - The native expression engine runs in Rust; UDFs run in Ruby.
+    # - Use of Ruby UDFs forces the DataFrame to be materialized in memory.
+    # - Polars-native expressions can be parallelised (UDFs cannot).
+    # - Polars-native expressions can be logically optimised (UDFs cannot).
+    #
+    # Wherever possible you should strongly prefer the native expression API
+    # to achieve the best performance.
+    #
+    # @param return_dtype [Symbol]
+    #   Output type of the operation. If none given, Polars tries to infer the type.
+    # @param inference_size [Integer]
+    #   Only used in the case when the custom function returns rows.
+    #   This uses the first `n` rows to determine the output schema
+    #
+    # @return [Object]
+    #
+    # @note
+    #   The frame-level `apply` cannot track column names (as the UDF is a black-box
+    #   that may arbitrarily drop, rearrange, transform, or add new columns); if you
+    #   want to apply a UDF such that column names are preserved, you should use the
+    #   expression-level `apply` syntax instead.
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [-1, 5, 8]})
+    #
+    # @example Return a DataFrame by mapping each row to a tuple:
+    #   df.apply { |t| [t[0] * 2, t[1] * 3] }
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌──────────┬──────────┐
+    #   # │ column_0 ┆ column_1 │
+    #   # │ ---      ┆ ---      │
+    #   # │ i64      ┆ i64      │
+    #   # ╞══════════╪══════════╡
+    #   # │ 2        ┆ -3       │
+    #   # ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 4        ┆ 15       │
+    #   # ├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 6        ┆ 24       │
+    #   # └──────────┴──────────┘
+    #
+    # @example Return a Series by mapping each row to a scalar:
+    #   df.apply { |t| t[0] * 2 + t[1] }
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌───────┐
+    #   # │ apply │
+    #   # │ ---   │
+    #   # │ i64   │
+    #   # ╞═══════╡
+    #   # │ 1     │
+    #   # ├╌╌╌╌╌╌╌┤
+    #   # │ 9     │
+    #   # ├╌╌╌╌╌╌╌┤
+    #   # │ 14    │
+    #   # └───────┘
+    def apply(return_dtype: nil, inference_size: 256, &f)
+      out, is_df = _df.apply(f, return_dtype, inference_size)
+      if is_df
+        _from_rbdf(out)
+      else
+        _from_rbdf(Utils.wrap_s(out).to_frame._df)
+      end
+    end
 
     # Return a new DataFrame with the column added or replaced.
     #
     # @param column [Object]
     #   Series, where the name of the Series refers to the column in the DataFrame.
@@ -2176,22 +3008,409 @@
     #   # └─────────┴─────────┘
     def explode(columns)
       lazy.explode(columns).collect(no_optimization: true)
     end
 
-    # def pivot
-    # end
+    # Create a spreadsheet-style pivot table as a DataFrame.
+    #
+    # @param values [Object]
+    #   Column values to aggregate. Can be multiple columns if the *columns*
+    #   arguments contains multiple columns as well
+    # @param index [Object]
+    #   One or multiple keys to group by
+    # @param columns [Object]
+    #   Columns whose values will be used as the header of the output DataFrame
+    # @param aggregate_fn ["first", "sum", "max", "min", "mean", "median", "last", "count"]
+    #   A predefined aggregate function str or an expression.
+    # @param maintain_order [Object]
+    #   Sort the grouped keys so that the output order is predictable.
+    # @param sort_columns [Object]
+    #   Sort the transposed columns by name. Default is by order of discovery.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => ["one", "one", "one", "two", "two", "two"],
+    #       "bar" => ["A", "B", "C", "A", "B", "C"],
+    #       "baz" => [1, 2, 3, 4, 5, 6]
+    #     }
+    #   )
+    #   df.pivot(values: "baz", index: "foo", columns: "bar")
+    #   # =>
+    #   # shape: (2, 4)
+    #   # ┌─────┬─────┬─────┬─────┐
+    #   # │ foo ┆ A   ┆ B   ┆ C   │
+    #   # │ --- ┆ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ i64 ┆ i64 │
+    #   # ╞═════╪═════╪═════╪═════╡
+    #   # │ one ┆ 1   ┆ 2   ┆ 3   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ two ┆ 4   ┆ 5   ┆ 6   │
+    #   # └─────┴─────┴─────┴─────┘
+    def pivot(
+      values:,
+      index:,
+      columns:,
+      aggregate_fn: "first",
+      maintain_order: true,
+      sort_columns: false
+    )
+      if values.is_a?(String)
+        values = [values]
+      end
+      if index.is_a?(String)
+        index = [index]
+      end
+      if columns.is_a?(String)
+        columns = [columns]
+      end
 
-    # def melt
-    # end
+      if aggregate_fn.is_a?(String)
+        case aggregate_fn
+        when "first"
+          aggregate_fn = Polars.element.first
+        when "sum"
+          aggregate_fn = Polars.element.sum
+        when "max"
+          aggregate_fn = Polars.element.max
+        when "min"
+          aggregate_fn = Polars.element.min
+        when "mean"
+          aggregate_fn = Polars.element.mean
+        when "median"
+          aggregate_fn = Polars.element.median
+        when "last"
+          aggregate_fn = Polars.element.last
+        when "count"
+          aggregate_fn = Polars.count
+        else
+          raise ArgumentError, "Argument aggregate fn: '#{aggregate_fn}' was not expected."
+        end
+      end
 
-    # def unstack
-    # end
+      _from_rbdf(
+        _df.pivot_expr(
+          values,
+          index,
+          columns,
+          aggregate_fn._rbexpr,
+          maintain_order,
+          sort_columns
+        )
+      )
+    end
 
-    # def partition_by
-    # end
+    # Unpivot a DataFrame from wide to long format.
+    #
+    # Optionally leaves identifiers set.
+    #
+    # This function is useful to massage a DataFrame into a format where one or more
+    # columns are identifier variables (id_vars), while all other columns, considered
+    # measured variables (value_vars), are "unpivoted" to the row axis, leaving just
+    # two non-identifier columns, 'variable' and 'value'.
+    #
+    # @param id_vars [Object]
+    #   Columns to use as identifier variables.
+    # @param value_vars [Object]
+    #   Values to use as identifier variables.
+    #   If `value_vars` is empty all columns that are not in `id_vars` will be used.
+    # @param variable_name [String]
+    #   Name to give to the `value` column. Defaults to "variable"
+    # @param value_name [String]
+    #   Name to give to the `value` column. Defaults to "value"
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["x", "y", "z"],
+    #       "b" => [1, 3, 5],
+    #       "c" => [2, 4, 6]
+    #     }
+    #   )
+    #   df.melt(id_vars: "a", value_vars: ["b", "c"])
+    #   # =>
+    #   # shape: (6, 3)
+    #   # ┌─────┬──────────┬───────┐
+    #   # │ a   ┆ variable ┆ value │
+    #   # │ --- ┆ ---      ┆ ---   │
+    #   # │ str ┆ str      ┆ i64   │
+    #   # ╞═════╪══════════╪═══════╡
+    #   # │ x   ┆ b        ┆ 1     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ y   ┆ b        ┆ 3     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ z   ┆ b        ┆ 5     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ x   ┆ c        ┆ 2     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ y   ┆ c        ┆ 4     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ z   ┆ c        ┆ 6     │
+    #   # └─────┴──────────┴───────┘
+    def melt(id_vars: nil, value_vars: nil, variable_name: nil, value_name: nil)
+      if value_vars.is_a?(String)
+        value_vars = [value_vars]
+      end
+      if id_vars.is_a?(String)
+        id_vars = [id_vars]
+      end
+      if value_vars.nil?
+        value_vars = []
+      end
+      if id_vars.nil?
+        id_vars = []
+      end
+      _from_rbdf(
+        _df.melt(id_vars, value_vars, value_name, variable_name)
+      )
+    end
 
+    # Unstack a long table to a wide form without doing an aggregation.
+    #
+    # This can be much faster than a pivot, because it can skip the grouping phase.
+    #
+    # @note
+    #   This functionality is experimental and may be subject to changes
+    #   without it being considered a breaking change.
+    #
+    # @param step Integer
+    #   Number of rows in the unstacked frame.
+    # @param how ["vertical", "horizontal"]
+    #   Direction of the unstack.
+    # @param columns [Object]
+    #   Column to include in the operation.
+    # @param fill_values [Object]
+    #   Fill values that don't fit the new size with this value.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "col1" => "A".."I",
+    #       "col2" => Polars.arange(0, 9, eager: true)
+    #     }
+    #   )
+    #   # =>
+    #   # shape: (9, 2)
+    #   # ┌──────┬──────┐
+    #   # │ col1 ┆ col2 │
+    #   # │ ---  ┆ ---  │
+    #   # │ str  ┆ i64  │
+    #   # ╞══════╪══════╡
+    #   # │ A    ┆ 0    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ B    ┆ 1    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ C    ┆ 2    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ D    ┆ 3    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ ...  ┆ ...  │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ F    ┆ 5    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ G    ┆ 6    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ H    ┆ 7    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ I    ┆ 8    │
+    #   # └──────┴──────┘
+    #
+    # @example
+    #   df.unstack(step: 3, how: "vertical")
+    #   # =>
+    #   # shape: (3, 6)
+    #   # ┌────────┬────────┬────────┬────────┬────────┬────────┐
+    #   # │ col1_0 ┆ col1_1 ┆ col1_2 ┆ col2_0 ┆ col2_1 ┆ col2_2 │
+    #   # │ ---    ┆ ---    ┆ ---    ┆ ---    ┆ ---    ┆ ---    │
+    #   # │ str    ┆ str    ┆ str    ┆ i64    ┆ i64    ┆ i64    │
+    #   # ╞════════╪════════╪════════╪════════╪════════╪════════╡
+    #   # │ A      ┆ D      ┆ G      ┆ 0      ┆ 3      ┆ 6      │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ B      ┆ E      ┆ H      ┆ 1      ┆ 4      ┆ 7      │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ C      ┆ F      ┆ I      ┆ 2      ┆ 5      ┆ 8      │
+    #   # └────────┴────────┴────────┴────────┴────────┴────────┘
+    #
+    # @example
+    #   df.unstack(step: 3, how: "horizontal")
+    #   # =>
+    #   # shape: (3, 6)
+    #   # ┌────────┬────────┬────────┬────────┬────────┬────────┐
+    #   # │ col1_0 ┆ col1_1 ┆ col1_2 ┆ col2_0 ┆ col2_1 ┆ col2_2 │
+    #   # │ ---    ┆ ---    ┆ ---    ┆ ---    ┆ ---    ┆ ---    │
+    #   # │ str    ┆ str    ┆ str    ┆ i64    ┆ i64    ┆ i64    │
+    #   # ╞════════╪════════╪════════╪════════╪════════╪════════╡
+    #   # │ A      ┆ B      ┆ C      ┆ 0      ┆ 1      ┆ 2      │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ D      ┆ E      ┆ F      ┆ 3      ┆ 4      ┆ 5      │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ G      ┆ H      ┆ I      ┆ 6      ┆ 7      ┆ 8      │
+    #   # └────────┴────────┴────────┴────────┴────────┴────────┘
+    def unstack(step:, how: "vertical", columns: nil, fill_values: nil)
+      if !columns.nil?
+        df = select(columns)
+      else
+        df = self
+      end
+
+      height = df.height
+      if how == "vertical"
+        n_rows = step
+        n_cols = (height / n_rows.to_f).ceil
+      else
+        n_cols = step
+        n_rows = (height / n_cols.to_f).ceil
+      end
+
+      n_fill = n_cols * n_rows - height
+
+      if n_fill > 0
+        if !fill_values.is_a?(Array)
+          fill_values = [fill_values] * df.width
+        end
+
+        df = df.select(
+          df.get_columns.zip(fill_values).map do |s, next_fill|
+            s.extend_constant(next_fill, n_fill)
+          end
+        )
+      end
+
+      if how == "horizontal"
+        df = (
+          df.with_column(
+            (Polars.arange(0, n_cols * n_rows, eager: true) % n_cols).alias(
+              "__sort_order"
+            )
+          )
+          .sort("__sort_order")
+          .drop("__sort_order")
+        )
+      end
+
+      zfill_val = Math.log10(n_cols).floor + 1
+      slices =
+        df.get_columns.flat_map do |s|
+          n_cols.times.map do |slice_nbr|
+            s.slice(slice_nbr * n_rows, n_rows).alias("%s_%0#{zfill_val}d" % [s.name, slice_nbr])
+          end
+        end
+
+      _from_rbdf(DataFrame.new(slices)._df)
+    end
+
+    # Split into multiple DataFrames partitioned by groups.
+    #
+    # @param groups [Object]
+    #   Groups to partition by.
+    # @param maintain_order [Boolean]
+    #   Keep predictable output order. This is slower as it requires an extra sort
+    #   operation.
+    # @param as_dict [Boolean]
+    #   If true, return the partitions in a dictionary keyed by the distinct group
+    #   values instead of a list.
+    #
+    # @return [Object]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => ["A", "A", "B", "B", "C"],
+    #       "N" => [1, 2, 2, 4, 2],
+    #       "bar" => ["k", "l", "m", "m", "l"]
+    #     }
+    #   )
+    #   df.partition_by("foo", maintain_order: true)
+    #   # =>
+    #   # [shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ N   ┆ bar │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ A   ┆ 1   ┆ k   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ A   ┆ 2   ┆ l   │
+    #   # └─────┴─────┴─────┘, shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ N   ┆ bar │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ B   ┆ 2   ┆ m   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ B   ┆ 4   ┆ m   │
+    #   # └─────┴─────┴─────┘, shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ N   ┆ bar │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ C   ┆ 2   ┆ l   │
+    #   # └─────┴─────┴─────┘]
+    #
+    # @example
+    #   df.partition_by("foo", maintain_order: true, as_dict: true)
+    #   # =>
+    #   # {"A"=>shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ N   ┆ bar │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ A   ┆ 1   ┆ k   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ A   ┆ 2   ┆ l   │
+    #   # └─────┴─────┴─────┘, "B"=>shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ N   ┆ bar │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ B   ┆ 2   ┆ m   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ B   ┆ 4   ┆ m   │
+    #   # └─────┴─────┴─────┘, "C"=>shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ N   ┆ bar │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ str ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ C   ┆ 2   ┆ l   │
+    #   # └─────┴─────┴─────┘}
+    def partition_by(groups, maintain_order: true, as_dict: false)
+      if groups.is_a?(String)
+        groups = [groups]
+      elsif !groups.is_a?(Array)
+        groups = Array(groups)
+      end
+
+      if as_dict
+        out = {}
+        if groups.length == 1
+          _df.partition_by(groups, maintain_order).each do |df|
+            df = _from_rbdf(df)
+            out[df[groups][0, 0]] = df
+          end
+        else
+          _df.partition_by(groups, maintain_order).each do |df|
+            df = _from_rbdf(df)
+            out[df[groups].row(0)] = df
+          end
+        end
+        out
+      else
+        _df.partition_by(groups, maintain_order).map { |df| _from_rbdf(df) }
+      end
+    end
+
     # Shift values by the given period.
     #
     # @param periods [Integer]
     #   Number of places to shift (may be negative).
     #
@@ -3059,13 +4278,98 @@
         n = 1
       end
       _from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
     end
 
-    # def fold
-    # end
+    # Apply a horizontal reduction on a DataFrame.
+    #
+    # This can be used to effectively determine aggregations on a row level, and can
+    # be applied to any DataType that can be supercasted (casted to a similar parent
+    # type).
+    #
+    # An example of the supercast rules when applying an arithmetic operation on two
+    # DataTypes are for instance:
+    #
+    # i8 + str = str
+    # f32 + i64 = f32
+    # f32 + f64 = f64
+    #
+    # @return [Series]
+    #
+    # @example A horizontal sum operation:
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [2, 1, 3],
+    #       "b" => [1, 2, 3],
+    #       "c" => [1.0, 2.0, 3.0]
+    #     }
+    #   )
+    #   df.fold { |s1, s2| s1 + s2 }
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'a' [f64]
+    #   # [
+    #   #         4.0
+    #   #         5.0
+    #   #         9.0
+    #   # ]
+    #
+    # @example A horizontal minimum operation:
+    #   df = Polars::DataFrame.new({"a" => [2, 1, 3], "b" => [1, 2, 3], "c" => [1.0, 2.0, 3.0]})
+    #   df.fold { |s1, s2| s1.zip_with(s1 < s2, s2) }
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'a' [f64]
+    #   # [
+    #   #         1.0
+    #   #         1.0
+    #   #         3.0
+    #   # ]
+    #
+    # @example A horizontal string concatenation:
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["foo", "bar", 2],
+    #       "b" => [1, 2, 3],
+    #       "c" => [1.0, 2.0, 3.0]
+    #     }
+    #   )
+    #   df.fold { |s1, s2| s1 + s2 }
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'a' [str]
+    #   # [
+    #   #         "foo11.0"
+    #   #         "bar22.0"
+    #   #         null
+    #   # ]
+    #
+    # @example A horizontal boolean or, similar to a row-wise .any():
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [false, false, true],
+    #       "b" => [false, true, false]
+    #     }
+    #   )
+    #   df.fold { |s1, s2| s1 | s2 }
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'a' [bool]
+    #   # [
+    #   #         false
+    #   #         true
+    #   #         true
+    #   # ]
+    def fold(&operation)
+      acc = to_series(0)
 
+      1.upto(width - 1) do |i|
+        acc = operation.call(acc, to_series(i))
+      end
+      acc
+    end
+
     # Get a row as tuple, either by index or by predicate.
     #
     # @param index [Object]
     #   Row index.
     # @param by_predicate [Object]
@@ -3169,12 +4473,49 @@
     #   # └─────┴─────┘
     def take_every(n)
       select(Utils.col("*").take_every(n))
     end
 
-    # def hash_rows
-    # end
+    # Hash and combine the rows in this DataFrame.
+    #
+    # The hash value is of type `:u64`.
+    #
+    # @param seed [Integer]
+    #   Random seed parameter. Defaults to 0.
+    # @param seed_1 [Integer]
+    #   Random seed parameter. Defaults to `seed` if not set.
+    # @param seed_2 [Integer]
+    #   Random seed parameter. Defaults to `seed` if not set.
+    # @param seed_3 [Integer]
+    #   Random seed parameter. Defaults to `seed` if not set.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, nil, 3, 4],
+    #       "ham" => ["a", "b", nil, "d"]
+    #     }
+    #   )
+    #   df.hash_rows(seed: 42)
+    #   # =>
+    #   # shape: (4,)
+    #   # Series: '' [u64]
+    #   # [
+    #   #         4238614331852490969
+    #   #         17976148875586754089
+    #   #         4702262519505526977
+    #   #         18144177983981041107
+    #   # ]
+    def hash_rows(seed: 0, seed_1: nil, seed_2: nil, seed_3: nil)
+      k0 = seed
+      k1 = seed_1.nil? ? seed : seed_1
+      k2 = seed_2.nil? ? seed : seed_2
+      k3 = seed_3.nil? ? seed : seed_3
+      Utils.wrap_s(_df.hash_rows(k0, k1, k2, k3))
+    end
 
     # Interpolate intermediate values. The interpolation method is linear.
     #
     # @return [DataFrame]
     #
@@ -3295,11 +4636,23 @@
     def initialize_copy(other)
       super
       self._df = _df._clone
     end
 
-    def hash_to_rbdf(data, columns: nil)
+    def _pos_idx(idx, dim)
+      if idx >= 0
+        idx
+      else
+        shape[dim] + idx
+      end
+    end
+
+    # def _pos_idxs
+    # end
+
+    # @private
+    def self.hash_to_rbdf(data, columns: nil)
       if !columns.nil?
         columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
 
         if data.empty? && dtypes
           data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
@@ -3311,15 +4664,38 @@
       end
 
       RbDataFrame.read_hash(data)
     end
 
-    def _unpack_columns(columns, lookup_names: nil)
-      [columns.keys, columns]
+    # @private
+    def self._unpack_columns(columns, lookup_names: nil, n_expected: nil)
+      if columns.is_a?(Hash)
+        columns = columns.to_a
+      end
+      column_names =
+        (columns || []).map.with_index do |col, i|
+          if col.is_a?(String)
+            col || "column_#{i}"
+          else
+            col[0]
+          end
+        end
+      if column_names.empty? && n_expected
+        column_names = n_expected.times.map { |i| "column_#{i}" }
+      end
+      # TODO zip_longest
+      lookup = column_names.zip(lookup_names || []).to_h
+
+      [
+        column_names,
+        (columns || []).select { |col| !col.is_a?(String) && col[1] }.to_h do |col|
+          [lookup[col[0]] || col[0], col[1]]
+        end
+      ]
     end
 
-    def _handle_columns_arg(data, columns: nil)
+    def self._handle_columns_arg(data, columns: nil)
       if columns.nil?
         data
       else
         if data.empty?
           columns.map { |c| Series.new(c, nil)._s }
@@ -3333,17 +4709,42 @@
           raise ArgumentError, "Dimensions of columns arg must match data dimensions."
         end
       end
     end
 
-    def sequence_to_rbdf(data, columns: nil, orient: nil)
-      if columns || orient
-        raise Todo
+    # @private
+    def self.sequence_to_rbdf(data, columns: nil, orient: nil)
+      if data.length == 0
+        return hash_to_rbdf({}, columns: columns)
       end
-      RbDataFrame.new(data.map(&:_s))
+
+      if data[0].is_a?(Series)
+        # series_names = data.map(&:name)
+        # columns, dtypes = _unpack_columns(columns || series_names, n_expected: data.length)
+        data_series = []
+        data.each do |s|
+          data_series << s._s
+        end
+      elsif data[0].is_a?(Array)
+        if orient.nil? && !columns.nil?
+          orient = columns.length == data.length ? "col" : "row"
+        end
+
+        if orient == "row"
+          raise Todo
+        elsif orient == "col" || orient.nil?
+          raise Todo
+        else
+          raise ArgumentError, "orient must be one of {{'col', 'row', nil}}, got #{orient} instead."
+        end
+      end
+
+      data_series = _handle_columns_arg(data_series, columns: columns)
+      RbDataFrame.new(data_series)
     end
 
-    def series_to_rbdf(data, columns: nil)
+    # @private
+    def self.series_to_rbdf(data, columns: nil)
       if columns
         raise Todo
       end
       RbDataFrame.new([data._s])
     end