data_frame.rb in polars-df-0.1.3

- old
+ new

@@ -153,16 +153,39 @@
         )
       )
     end
 
     # @private
-    def self._read_parquet(file)
+    def self._read_parquet(
+      file,
+      columns: nil,
+      n_rows: nil,
+      parallel: "auto",
+      row_count_name: nil,
+      row_count_offset: 0,
+      low_memory: false
+    )
       if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
         file = Utils.format_path(file)
       end
 
-      _from_rbdf(RbDataFrame.read_parquet(file))
+      if file.is_a?(String) && file.include?("*")
+        raise Todo
+      end
+
+      projection, columns = Utils.handle_projection_columns(columns)
+      _from_rbdf(
+        RbDataFrame.read_parquet(
+          file,
+          columns,
+          projection,
+          n_rows,
+          parallel,
+          Utils._prepare_row_count_args(row_count_name, row_count_offset),
+          low_memory
+        )
+      )
     end
 
     # def self._read_avro
     # end
 
@@ -257,15 +280,17 @@
     # Get column names.
     #
     # @return [Array]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6, 7, 8],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.columns
     #   # => ["foo", "bar", "ham"]
     def columns
       _df.columns
     end
@@ -277,15 +302,17 @@
     #   The length of the list should be equal to the width of the DataFrame.
     #
     # @return [Object]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6, 7, 8],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.columns = ["apple", "banana", "orange"]
     #   df
     #   # =>
     #   # shape: (3, 3)
     #   # ┌───────┬────────┬────────┐
@@ -306,15 +333,17 @@
     # Get dtypes of columns in DataFrame. Dtypes can also be found in column headers when printing the DataFrame.
     #
     # @return [Array]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6.0, 7.0, 8.0],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.dtypes
     #   # => [:i64, :f64, :str]
     def dtypes
       _df.dtypes
     end
@@ -322,60 +351,136 @@
     # Get the schema.
     #
     # @return [Hash]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6.0, 7.0, 8.0],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.schema
     #   # => {"foo"=>:i64, "bar"=>:f64, "ham"=>:str}
     def schema
       columns.zip(dtypes).to_h
     end
 
-    # def ==(other)
-    # end
+    # Equal.
+    #
+    # @return [DataFrame]
+    def ==(other)
+      _comp(other, "eq")
+    end
 
-    # def !=(other)
-    # end
+    # Not equal.
+    #
+    # @return [DataFrame]
+    def !=(other)
+      _comp(other, "neq")
+    end
 
-    # def >(other)
-    # end
+    # Greater than.
+    #
+    # @return [DataFrame]
+    def >(other)
+      _comp(other, "gt")
+    end
 
-    # def <(other)
-    # end
+    # Less than.
+    #
+    # @return [DataFrame]
+    def <(other)
+      _comp(other, "lt")
+    end
 
-    # def >=(other)
-    # end
+    # Greater than or equal.
+    #
+    # @return [DataFrame]
+    def >=(other)
+      _comp(other, "gt_eq")
+    end
 
-    # def <=(other)
-    # end
+    # Less than or equal.
+    #
+    # @return [DataFrame]
+    def <=(other)
+      _comp(other, "lt_eq")
+    end
 
-    # def *(other)
-    # end
+    # Performs multiplication.
+    #
+    # @return [DataFrame]
+    def *(other)
+      if other.is_a?(DataFrame)
+        return _from_rbdf(_df.mul_df(other._df))
+      end
 
-    # def /(other)
-    # end
+      other = _prepare_other_arg(other)
+      _from_rbdf(_df.mul(other._s))
+    end
 
-    # def +(other)
-    # end
+    # Performs division.
+    #
+    # @return [DataFrame]
+    def /(other)
+      if other.is_a?(DataFrame)
+        return _from_rbdf(_df.div_df(other._df))
+      end
 
-    # def -(other)
-    # end
+      other = _prepare_other_arg(other)
+      _from_rbdf(_df.div(other._s))
+    end
 
-    # def %(other)
-    # end
+    # Performs addition.
+    #
+    # @return [DataFrame]
+    def +(other)
+      if other.is_a?(DataFrame)
+        return _from_rbdf(_df.add_df(other._df))
+      end
 
+      other = _prepare_other_arg(other)
+      _from_rbdf(_df.add(other._s))
+    end
+
+    # Performs subtraction.
     #
+    # @return [DataFrame]
+    def -(other)
+      if other.is_a?(DataFrame)
+        return _from_rbdf(_df.sub_df(other._df))
+      end
+
+      other = _prepare_other_arg(other)
+      _from_rbdf(_df.sub(other._s))
+    end
+
+    # Returns the modulo.
+    #
+    # @return [DataFrame]
+    def %(other)
+      if other.is_a?(DataFrame)
+        return _from_rbdf(_df.rem_df(other._df))
+      end
+
+      other = _prepare_other_arg(other)
+      _from_rbdf(_df.rem(other._s))
+    end
+
+    # Returns a string representing the DataFrame.
+    #
+    # @return [String]
     def to_s
       _df.to_s
     end
     alias_method :inspect, :to_s
 
+    # Check if DataFrame includes column.
+    #
+    # @return [Boolean]
     def include?(name)
       columns.include?(name)
     end
 
     # def each
@@ -385,21 +490,92 @@
     # end
 
     # def _pos_idxs
     # end
 
+    # Returns subset of the DataFrame.
     #
-    def [](name)
-      Utils.wrap_s(_df.column(name))
+    # @return [Object]
+    def [](*args)
+      if args.size == 2
+        row_selection, col_selection = args
+
+        # df[.., unknown]
+        if row_selection.is_a?(Range)
+
+          # multiple slices
+          # df[.., ..]
+          if col_selection.is_a?(Range)
+            raise Todo
+          end
+        end
+
+        # df[2, ..] (select row as df)
+        if row_selection.is_a?(Integer)
+          if col_selection.is_a?(Array)
+            df = self[0.., col_selection]
+            return df.slice(row_selection, 1)
+          end
+          # df[2, "a"]
+          if col_selection.is_a?(String)
+            return self[col_selection][row_selection]
+          end
+        end
+
+        # column selection can be "a" and ["a", "b"]
+        if col_selection.is_a?(String)
+          col_selection = [col_selection]
+        end
+
+        # df[.., 1]
+        if col_selection.is_a?(Integer)
+          series = to_series(col_selection)
+          return series[row_selection]
+        end
+
+        if col_selection.is_a?(Array)
+          # df[.., [1, 2]]
+          if is_int_sequence(col_selection)
+            series_list = col_selection.map { |i| to_series(i) }
+            df = self.class.new(series_list)
+            return df[row_selection]
+          end
+        end
+
+        df = self[col_selection]
+        return df[row_selection]
+      elsif args.size == 1
+        item = args[0]
+
+        # select single column
+        # df["foo"]
+        if item.is_a?(String)
+          return Utils.wrap_s(_df.column(item))
+        end
+
+        # df[idx]
+        if item.is_a?(Integer)
+          return slice(_pos_idx(item, dim: 0), 1)
+        end
+
+        # df[..]
+        if item.is_a?(Range)
+          return Slice.new(self).apply(item)
+        end
+      end
+
+      raise ArgumentError, "Cannot get item of type: #{item.class.name}"
     end
 
     # def []=(key, value)
     # end
 
     # no to_arrow
 
+    # Convert DataFrame to a hash mapping column name to values.
     #
+    # @return [Hash]
     def to_h(as_series: true)
       if as_series
         get_columns.to_h { |s| [s.name, s] }
       else
         get_columns.to_h { |s| [s.name, s.to_a] }
@@ -420,15 +596,17 @@
     #   Location of selection.
     #
     # @return [Series]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6, 7, 8],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.to_series(1)
     #   # =>
     #   # shape: (3,)
     #   # Series: 'bar' [i64]
     #   # [
@@ -517,15 +695,17 @@
     #   A string representing null values (defaulting to the empty string).
     #
     # @return [String, nil]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3, 4, 5],
-    #     "bar" => [6, 7, 8, 9, 10],
-    #     "ham" => ["a", "b", "c", "d", "e"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3, 4, 5],
+    #       "bar" => [6, 7, 8, 9, 10],
+    #       "ham" => ["a", "b", "c", "d", "e"]
+    #     }
+    #   )
     #   df.write_csv("file.csv")
     def write_csv(
       file = nil,
       has_header: true,
       sep: ",",
@@ -692,14 +872,16 @@
     # Reverse the DataFrame.
     #
     # @return [DataFrame]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "key" => ["a", "b", "c"],
-    #     "val" => [1, 2, 3]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "key" => ["a", "b", "c"],
+    #       "val" => [1, 2, 3]
+    #     }
+    #   )
     #   df.reverse()
     #   # =>
     #   # shape: (3, 2)
     #   # ┌─────┬─────┐
     #   # │ key ┆ val │
@@ -722,15 +904,17 @@
     #   Key value pairs that map from old name to new name.
     #
     # @return [DataFrame]
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "foo" => [1, 2, 3],
-    #     "bar" => [6, 7, 8],
-    #     "ham" => ["a", "b", "c"]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
     #   df.rename({"foo" => "apple"})
     #   # =>
     #   # shape: (3, 3)
     #   # ┌───────┬─────┬─────┐
     #   # │ apple ┆ bar ┆ ham │
@@ -773,15 +957,17 @@
     #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
     #   # │ 3   ┆ 99  ┆ 6   │
     #   # └─────┴─────┴─────┘
     #
     # @example
-    #   df = Polars::DataFrame.new({
-    #     "a" => [1, 2, 3, 4],
-    #     "b" => [0.5, 4, 10, 13],
-    #     "c" => [true, true, false, true]
-    #   })
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 4],
+    #       "b" => [0.5, 4, 10, 13],
+    #       "c" => [true, true, false, true]
+    #     }
+    #   )
     #   s = Polars::Series.new("d", [-2.5, 15, 20.5, 0])
     #   df.insert_at_idx(3, s)
     #   # =>
     #   # shape: (4, 4)
     #   # ┌─────┬──────┬───────┬──────┐
@@ -803,67 +989,564 @@
       end
       _df.insert_at_idx(index, series._s)
       self
     end
 
+    # Filter the rows in the DataFrame based on a predicate expression.
+    #
+    # @param predicate [Expr]
+    #   Expression that evaluates to a boolean Series.
+    #
+    # @return [DataFrame]
+    #
+    # @example Filter on one condition:
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.filter(Polars.col("foo") < 3)
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # └─────┴─────┴─────┘
+    #
+    # @example Filter on multiple conditions:
+    #   df.filter((Polars.col("foo") < 3) & (Polars.col("ham") == "a"))
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # └─────┴─────┴─────┘
     def filter(predicate)
       lazy.filter(predicate).collect
     end
 
-    # def describe
-    # end
+    # Summary statistics for a DataFrame.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1.0, 2.8, 3.0],
+    #       "b" => [4, 5, nil],
+    #       "c" => [true, false, true],
+    #       "d" => [nil, "b", "c"],
+    #       "e" => ["usd", "eur", nil]
+    #     }
+    #   )
+    #   df.describe
+    #   # =>
+    #   # shape: (7, 6)
+    #   # ┌────────────┬──────────┬──────────┬──────┬──────┬──────┐
+    #   # │ describe   ┆ a        ┆ b        ┆ c    ┆ d    ┆ e    │
+    #   # │ ---        ┆ ---      ┆ ---      ┆ ---  ┆ ---  ┆ ---  │
+    #   # │ str        ┆ f64      ┆ f64      ┆ f64  ┆ str  ┆ str  │
+    #   # ╞════════════╪══════════╪══════════╪══════╪══════╪══════╡
+    #   # │ count      ┆ 3.0      ┆ 3.0      ┆ 3.0  ┆ 3    ┆ 3    │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ null_count ┆ 0.0      ┆ 1.0      ┆ 0.0  ┆ 1    ┆ 1    │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ mean       ┆ 2.266667 ┆ 4.5      ┆ null ┆ null ┆ null │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ std        ┆ 1.101514 ┆ 0.707107 ┆ null ┆ null ┆ null │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ min        ┆ 1.0      ┆ 4.0      ┆ 0.0  ┆ b    ┆ eur  │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ max        ┆ 3.0      ┆ 5.0      ┆ 1.0  ┆ c    ┆ usd  │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ median     ┆ 2.8      ┆ 4.5      ┆ null ┆ null ┆ null │
+    #   # └────────────┴──────────┴──────────┴──────┴──────┴──────┘
+    def describe
+      describe_cast = lambda do |stat|
+        columns = []
+        self.columns.each_with_index do |s, i|
+          if self[s].is_numeric || self[s].is_boolean
+            columns << stat[0.., i].cast(:f64)
+          else
+            # for dates, strings, etc, we cast to string so that all
+            # statistics can be shown
+            columns << stat[0.., i].cast(:str)
+          end
+        end
+        self.class.new(columns)
+      end
 
-    # def find_idx_by_name
-    # end
+      summary = _from_rbdf(
+        Polars.concat(
+          [
+            describe_cast.(
+              self.class.new(columns.to_h { |c| [c, [height]] })
+            ),
+            describe_cast.(null_count),
+            describe_cast.(mean),
+            describe_cast.(std),
+            describe_cast.(min),
+            describe_cast.(max),
+            describe_cast.(median)
+          ]
+        )._df
+      )
+      summary.insert_at_idx(
+        0,
+        Polars::Series.new(
+          "describe",
+          ["count", "null_count", "mean", "std", "min", "max", "median"],
+        )
+      )
+      summary
+    end
 
-    # def replace_at_idx
-    # end
+    # Find the index of a column by name.
+    #
+    # @param name [String]
+    #   Name of the column to find.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {"foo" => [1, 2, 3], "bar" => [6, 7, 8], "ham" => ["a", "b", "c"]}
+    #   )
+    #   df.find_idx_by_name("ham")
+    #   # => 2
+    def find_idx_by_name(name)
+      _df.find_idx_by_name(name)
+    end
 
+    # Replace a column at an index location.
     #
+    # @param index [Integer]
+    #   Column index.
+    # @param series [Series]
+    #   Series that will replace the column.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   s = Polars::Series.new("apple", [10, 20, 30])
+    #   df.replace_at_idx(0, s)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌───────┬─────┬─────┐
+    #   # │ apple ┆ bar ┆ ham │
+    #   # │ ---   ┆ --- ┆ --- │
+    #   # │ i64   ┆ i64 ┆ str │
+    #   # ╞═══════╪═════╪═════╡
+    #   # │ 10    ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 20    ┆ 7   ┆ b   │
+    #   # ├╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 30    ┆ 8   ┆ c   │
+    #   # └───────┴─────┴─────┘
+    def replace_at_idx(index, series)
+      if index < 0
+        index = columns.length + index
+      end
+      _df.replace_at_idx(index, series._s)
+      self
+    end
+
+    # Sort the DataFrame by column.
+    #
+    # @param by [String]
+    #   By which column to sort.
+    # @param reverse [Boolean]
+    #   Reverse/descending sort.
+    # @param nulls_last [Boolean]
+    #   Place null values last. Can only be used if sorted by a single column.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.sort("foo", reverse: true)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8.0 ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1   ┆ 6.0 ┆ a   │
+    #   # └─────┴─────┴─────┘
+    #
+    # @example Sort by multiple columns.
+    #   df.sort(
+    #     [Polars.col("foo"), Polars.col("bar")**2],
+    #     reverse: [true, false]
+    #   )
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8.0 ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1   ┆ 6.0 ┆ a   │
+    #   # └─────┴─────┴─────┘
     def sort(by, reverse: false, nulls_last: false)
-      _from_rbdf(_df.sort(by, reverse, nulls_last))
+      if by.is_a?(Array) || by.is_a?(Expr)
+        lazy
+          .sort(by, reverse: reverse, nulls_last: nulls_last)
+          .collect(no_optimization: true, string_cache: false)
+      else
+        _from_rbdf(_df.sort(by, reverse, nulls_last))
+      end
     end
 
+    # Check if DataFrame is equal to other.
+    #
+    # @param other [DataFrame]
+    #   DataFrame to compare with.
+    # @param null_equal [Boolean]
+    #   Consider null values as equal.
+    #
+    # @return [Boolean]
+    #
+    # @example
+    #   df1 = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df2 = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [3, 2, 1],
+    #       "bar" => [8.0, 7.0, 6.0],
+    #       "ham" => ["c", "b", "a"]
+    #     }
+    #   )
+    #   df1.frame_equal(df1)
+    #   # => true
+    #   df1.frame_equal(df2)
+    #   # => false
     def frame_equal(other, null_equal: true)
       _df.frame_equal(other._df, null_equal)
     end
 
-    # def replace
-    # end
+    # Replace a column by a new Series.
+    #
+    # @param column [String]
+    #   Column to replace.
+    # @param new_col [Series]
+    #   New column to insert.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
+    #   s = Polars::Series.new([10, 20, 30])
+    #   df.replace("foo", s)
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 10  ┆ 4   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 20  ┆ 5   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 30  ┆ 6   │
+    #   # └─────┴─────┘
+    def replace(column, new_col)
+      _df.replace(column, new_col._s)
+      self
+    end
 
+    # Get a slice of this DataFrame.
     #
+    # @param offset [Integer]
+    #   Start index. Negative indexing is supported.
+    # @param length [Integer, nil]
+    #   Length of the slice. If set to `nil`, all rows starting at the offset
+    #   will be selected.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.slice(1, 2)
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 2   ┆ 7.0 ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8.0 ┆ c   │
+    #   # └─────┴─────┴─────┘
     def slice(offset, length = nil)
       if !length.nil? && length < 0
         length = height - offset + length
       end
       _from_rbdf(_df.slice(offset, length))
     end
 
+    # Get the first `n` rows.
+    #
+    # Alias for {#head}.
+    #
+    # @param n [Integer]
+    #   Number of rows to return.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {"foo" => [1, 2, 3, 4, 5, 6], "bar" => ["a", "b", "c", "d", "e", "f"]}
+    #   )
+    #   df.limit(4)
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ str │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 4   ┆ d   │
+    #   # └─────┴─────┘
     def limit(n = 5)
       head(n)
     end
 
+    # Get the first `n` rows.
+    #
+    # @param n [Integer]
+    #   Number of rows to return.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3, 4, 5],
+    #       "bar" => [6, 7, 8, 9, 10],
+    #       "ham" => ["a", "b", "c", "d", "e"]
+    #     }
+    #   )
+    #   df.head(3)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # └─────┴─────┴─────┘
     def head(n = 5)
       _from_rbdf(_df.head(n))
     end
 
+    # Get the last `n` rows.
+    #
+    # @param n [Integer]
+    #   Number of rows to return.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3, 4, 5],
+    #       "bar" => [6, 7, 8, 9, 10],
+    #       "ham" => ["a", "b", "c", "d", "e"]
+    #     }
+    #   )
+    #   df.tail(3)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 4   ┆ 9   ┆ d   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 5   ┆ 10  ┆ e   │
+    #   # └─────┴─────┴─────┘
     def tail(n = 5)
       _from_rbdf(_df.tail(n))
     end
 
-    # def drop_nulls
-    # end
+    # Return a new DataFrame where the null values are dropped.
+    #
+    # @param subset [Object]
+    #   Subset of column(s) on which `drop_nulls` will be applied.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, nil, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.drop_nulls
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # └─────┴─────┴─────┘
+    def drop_nulls(subset: nil)
+      if subset.is_a?(String)
+        subset = [subset]
+      end
+      _from_rbdf(_df.drop_nulls(subset))
+    end
 
     # def pipe
     # end
 
-    # def with_row_count
-    # end
+    # Add a column at index 0 that counts the rows.
+    #
+    # @param name [String]
+    #   Name of the column to add.
+    # @param offset [Integer]
+    #   Start the row count at this offset.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 3, 5],
+    #       "b" => [2, 4, 6]
+    #     }
+    #   )
+    #   df.with_row_count
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌────────┬─────┬─────┐
+    #   # │ row_nr ┆ a   ┆ b   │
+    #   # │ ---    ┆ --- ┆ --- │
+    #   # │ u32    ┆ i64 ┆ i64 │
+    #   # ╞════════╪═════╪═════╡
+    #   # │ 0      ┆ 1   ┆ 2   │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1      ┆ 3   ┆ 4   │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2      ┆ 5   ┆ 6   │
+    #   # └────────┴─────┴─────┘
+    def with_row_count(name: "row_nr", offset: 0)
+      _from_rbdf(_df.with_row_count(name, offset))
+    end
 
+    # Start a groupby operation.
     #
+    # @param by [Object]
+    #   Column(s) to group by.
+    # @param maintain_order [Boolean]
+    #   Make sure that the order of the groups remain consistent. This is more
+    #   expensive than a default groupby. Note that this only works in expression
+    #   aggregations.
+    #
+    # @return [GroupBy]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => ["a", "b", "a", "b", "b", "c"],
+    #       "b" => [1, 2, 3, 4, 5, 6],
+    #       "c" => [6, 5, 4, 3, 2, 1]
+    #     }
+    #   )
+    #   df.groupby("a").agg(Polars.col("b").sum).sort("a")
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ str ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ a   ┆ 4   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ b   ┆ 11  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ c   ┆ 6   │
+    #   # └─────┴─────┘
     def groupby(by, maintain_order: false)
-      lazy.groupby(by, maintain_order: maintain_order)
+      if !Utils.bool?(maintain_order)
+        raise TypeError, "invalid input for groupby arg `maintain_order`: #{maintain_order}."
+      end
+      if by.is_a?(String)
+        by = [by]
+      end
+      GroupBy.new(
+        _df,
+        by,
+        self.class,
+        maintain_order: maintain_order
+      )
     end
 
     # def groupby_rolling
     # end
 
@@ -874,11 +1557,113 @@
     # end
 
     # def join_asof
     # end
 
+    # Join in SQL-like fashion.
     #
+    # @param other [DataFrame]
+    #   DataFrame to join with.
+    # @param left_on [Object]
+    #   Name(s) of the left join column(s).
+    # @param right_on [Object]
+    #   Name(s) of the right join column(s).
+    # @param on [Object]
+    #   Name(s) of the join columns in both DataFrames.
+    # @param how ["inner", "left", "outer", "semi", "anti", "cross"]
+    #   Join strategy.
+    # @param suffix [String]
+    #   Suffix to append to columns with a duplicate name.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   other_df = Polars::DataFrame.new(
+    #     {
+    #       "apple" => ["x", "y", "z"],
+    #       "ham" => ["a", "b", "d"]
+    #     }
+    #   )
+    #   df.join(other_df, on: "ham")
+    #   # =>
+    #   # shape: (2, 4)
+    #   # ┌─────┬─────┬─────┬───────┐
+    #   # │ foo ┆ bar ┆ ham ┆ apple │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ f64 ┆ str ┆ str   │
+    #   # ╞═════╪═════╪═════╪═══════╡
+    #   # │ 1   ┆ 6.0 ┆ a   ┆ x     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   ┆ y     │
+    #   # └─────┴─────┴─────┴───────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "outer")
+    #   # =>
+    #   # shape: (4, 4)
+    #   # ┌──────┬──────┬─────┬───────┐
+    #   # │ foo  ┆ bar  ┆ ham ┆ apple │
+    #   # │ ---  ┆ ---  ┆ --- ┆ ---   │
+    #   # │ i64  ┆ f64  ┆ str ┆ str   │
+    #   # ╞══════╪══════╪═════╪═══════╡
+    #   # │ 1    ┆ 6.0  ┆ a   ┆ x     │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2    ┆ 7.0  ┆ b   ┆ y     │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ null ┆ null ┆ d   ┆ z     │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3    ┆ 8.0  ┆ c   ┆ null  │
+    #   # └──────┴──────┴─────┴───────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "left")
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌─────┬─────┬─────┬───────┐
+    #   # │ foo ┆ bar ┆ ham ┆ apple │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ f64 ┆ str ┆ str   │
+    #   # ╞═════╪═════╪═════╪═══════╡
+    #   # │ 1   ┆ 6.0 ┆ a   ┆ x     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   ┆ y     │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 8.0 ┆ c   ┆ null  │
+    #   # └─────┴─────┴─────┴───────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "semi")
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6.0 ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 ┆ b   │
+    #   # └─────┴─────┴─────┘
+    #
+    # @example
+    #   df.join(other_df, on: "ham", how: "anti")
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ f64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8.0 ┆ c   │
+    #   # └─────┴─────┴─────┘
     def join(other, left_on: nil, right_on: nil, on: nil, how: "inner", suffix: "_right")
       lazy
         .join(
           other.lazy,
           left_on: left_on,
@@ -891,59 +1676,412 @@
     end
 
     # def apply
     # end
 
+    # Return a new DataFrame with the column added or replaced.
     #
+    # @param column [Object]
+    #   Series, where the name of the Series refers to the column in the DataFrame.
+    #
+    # @return [DataFrame]
+    #
+    # @example Added
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 3, 5],
+    #       "b" => [2, 4, 6]
+    #     }
+    #   )
+    #   df.with_column((Polars.col("b") ** 2).alias("b_squared"))
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬───────────┐
+    #   # │ a   ┆ b   ┆ b_squared │
+    #   # │ --- ┆ --- ┆ ---       │
+    #   # │ i64 ┆ i64 ┆ f64       │
+    #   # ╞═════╪═════╪═══════════╡
+    #   # │ 1   ┆ 2   ┆ 4.0       │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 4   ┆ 16.0      │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 5   ┆ 6   ┆ 36.0      │
+    #   # └─────┴─────┴───────────┘
+    #
+    # @example Replaced
+    #   df.with_column(Polars.col("a") ** 2)
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌──────┬─────┐
+    #   # │ a    ┆ b   │
+    #   # │ ---  ┆ --- │
+    #   # │ f64  ┆ i64 │
+    #   # ╞══════╪═════╡
+    #   # │ 1.0  ┆ 2   │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 9.0  ┆ 4   │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 25.0 ┆ 6   │
+    #   # └──────┴─────┘
     def with_column(column)
       lazy
         .with_column(column)
         .collect(no_optimization: true, string_cache: false)
     end
 
-    # def hstack
-    # end
+    # Return a new DataFrame grown horizontally by stacking multiple Series to it.
+    #
+    # @param columns [Object]
+    #   Series to stack.
+    # @param in_place [Boolean]
+    #   Modify in place.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   x = Polars::Series.new("apple", [10, 20, 30])
+    #   df.hstack([x])
+    #   # =>
+    #   # shape: (3, 4)
+    #   # ┌─────┬─────┬─────┬───────┐
+    #   # │ foo ┆ bar ┆ ham ┆ apple │
+    #   # │ --- ┆ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ i64 ┆ str ┆ i64   │
+    #   # ╞═════╪═════╪═════╪═══════╡
+    #   # │ 1   ┆ 6   ┆ a   ┆ 10    │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   ┆ 20    │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   ┆ c   ┆ 30    │
+    #   # └─────┴─────┴─────┴───────┘
+    def hstack(columns, in_place: false)
+      if !columns.is_a?(Array)
+        columns = columns.get_columns
+      end
+      if in_place
+        _df.hstack_mut(columns.map(&:_s))
+        self
+      else
+        _from_rbdf(_df.hstack(columns.map(&:_s)))
+      end
+    end
 
-    # def vstack
-    # end
+    # Grow this DataFrame vertically by stacking a DataFrame to it.
+    #
+    # @param df [DataFrame]
+    #   DataFrame to stack.
+    # @param in_place [Boolean]
+    #   Modify in place
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df1 = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2],
+    #       "bar" => [6, 7],
+    #       "ham" => ["a", "b"]
+    #     }
+    #   )
+    #   df2 = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [3, 4],
+    #       "bar" => [8, 9],
+    #       "ham" => ["c", "d"]
+    #     }
+    #   )
+    #   df1.vstack(df2)
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 4   ┆ 9   ┆ d   │
+    #   # └─────┴─────┴─────┘
+    def vstack(df, in_place: false)
+      if in_place
+        _df.vstack_mut(df._df)
+        self
+      else
+        _from_rbdf(_df.vstack(df._df))
+      end
+    end
 
+    # Extend the memory backed by this `DataFrame` with the values from `other`.
     #
+    # Different from `vstack` which adds the chunks from `other` to the chunks of this
+    # `DataFrame` `extend` appends the data from `other` to the underlying memory
+    # locations and thus may cause a reallocation.
+    #
+    # If this does not cause a reallocation, the resulting data structure will not
+    # have any extra chunks and thus will yield faster queries.
+    #
+    # Prefer `extend` over `vstack` when you want to do a query after a single append.
+    # For instance during online operations where you add `n` rows and rerun a query.
+    #
+    # Prefer `vstack` over `extend` when you want to append many times before doing a
+    # query. For instance when you read in multiple files and when to store them in a
+    # single `DataFrame`. In the latter case, finish the sequence of `vstack`
+    # operations with a `rechunk`.
+    #
+    # @param other [DataFrame]
+    #   DataFrame to vertically add.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df1 = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
+    #   df2 = Polars::DataFrame.new({"foo" => [10, 20, 30], "bar" => [40, 50, 60]})
+    #   df1.extend(df2)
+    #   # =>
+    #   # shape: (6, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 4   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 5   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 6   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 10  ┆ 40  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 20  ┆ 50  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 30  ┆ 60  │
+    #   # └─────┴─────┘
     def extend(other)
       _df.extend(other._df)
       self
     end
 
-    # def drop
-    # end
+    # Remove column from DataFrame and return as new.
+    #
+    # @param columns [Object]
+    #   Column(s) to drop.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6.0, 7.0, 8.0],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.drop("ham")
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ f64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 6.0 │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7.0 │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8.0 │
+    #   # └─────┴─────┘
+    def drop(columns)
+      if columns.is_a?(Array)
+        df = clone
+        columns.each do |n|
+          df._df.drop_in_place(n)
+        end
+        df
+      else
+        _from_rbdf(_df.drop(columns))
+      end
+    end
 
-    # def drop_in_place
-    # end
+    # Drop in place.
+    #
+    # @param name [Object]
+    #   Column to drop.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.drop_in_place("ham")
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'ham' [str]
+    #   # [
+    #   #         "a"
+    #   #         "b"
+    #   #         "c"
+    #   # ]
+    def drop_in_place(name)
+      Utils.wrap_s(_df.drop_in_place(name))
+    end
 
-    # def cleared
-    # end
+    # Create an empty copy of the current DataFrame.
+    #
+    # Returns a DataFrame with identical schema but no data.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [nil, 2, 3, 4],
+    #       "b" => [0.5, nil, 2.5, 13],
+    #       "c" => [true, true, false, nil]
+    #     }
+    #   )
+    #   df.cleared
+    #   # =>
+    #   # shape: (0, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ a   ┆ b   ┆ c    │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ i64 ┆ f64 ┆ bool │
+    #   # ╞═════╪═════╪══════╡
+    #   # └─────┴─────┴──────┘
+    def cleared
+      height > 0 ? head(0) : clone
+    end
 
     # clone handled by initialize_copy
 
+    # Get the DataFrame as a Array of Series.
     #
+    # @return [Array]
     def get_columns
       _df.get_columns.map { |s| Utils.wrap_s(s) }
     end
 
+    # Get a single column as Series by name.
+    #
+    # @param name [String]
+    #   Name of the column to retrieve.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
+    #   df.get_column("foo")
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'foo' [i64]
+    #   # [
+    #   #         1
+    #   #         2
+    #   #         3
+    #   # ]
     def get_column(name)
       self[name]
     end
 
     # def fill_null
     # end
 
+    # Fill floating point NaN values by an Expression evaluation.
     #
+    # @param fill_value [Object]
+    #   Value to fill NaN with.
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   Note that floating point NaNs (Not a Number) are not missing values!
+    #   To replace missing values, use `fill_null`.
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1.5, 2, Float::NAN, 4],
+    #       "b" => [0.5, 4, Float::NAN, 13]
+    #     }
+    #   )
+    #   df.fill_nan(99)
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌──────┬──────┐
+    #   # │ a    ┆ b    │
+    #   # │ ---  ┆ ---  │
+    #   # │ f64  ┆ f64  │
+    #   # ╞══════╪══════╡
+    #   # │ 1.5  ┆ 0.5  │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 2.0  ┆ 4.0  │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 99.0 ┆ 99.0 │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 4.0  ┆ 13.0 │
+    #   # └──────┴──────┘
     def fill_nan(fill_value)
       lazy.fill_nan(fill_value).collect(no_optimization: true)
     end
 
-    # def explode
-    # end
+    # Explode `DataFrame` to long format by exploding a column with Lists.
+    #
+    # @param columns [Object]
+    #   Column of LargeList type.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "letters" => ["a", "a", "b", "c"],
+    #       "numbers" => [[1], [2, 3], [4, 5], [6, 7, 8]]
+    #     }
+    #   )
+    #   df.explode("numbers")
+    #   # =>
+    #   # shape: (8, 2)
+    #   # ┌─────────┬─────────┐
+    #   # │ letters ┆ numbers │
+    #   # │ ---     ┆ ---     │
+    #   # │ str     ┆ i64     │
+    #   # ╞═════════╪═════════╡
+    #   # │ a       ┆ 1       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ a       ┆ 2       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ a       ┆ 3       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ b       ┆ 4       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ b       ┆ 5       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ c       ┆ 6       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ c       ┆ 7       │
+    #   # ├╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ c       ┆ 8       │
+    #   # └─────────┴─────────┘
+    def explode(columns)
+      lazy.explode(columns).collect(no_optimization: true)
+    end
 
     # def pivot
     # end
 
     # def melt
@@ -953,77 +2091,433 @@
     # end
 
     # def partition_by
     # end
 
-    # def shift
-    # end
+    # Shift values by the given period.
+    #
+    # @param periods [Integer]
+    #   Number of places to shift (may be negative).
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.shift(1)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ null ┆ null ┆ null │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 1    ┆ 6    ┆ a    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 2    ┆ 7    ┆ b    │
+    #   # └──────┴──────┴──────┘
+    #
+    # @example
+    #   df.shift(-1)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌──────┬──────┬──────┐
+    #   # │ foo  ┆ bar  ┆ ham  │
+    #   # │ ---  ┆ ---  ┆ ---  │
+    #   # │ i64  ┆ i64  ┆ str  │
+    #   # ╞══════╪══════╪══════╡
+    #   # │ 2    ┆ 7    ┆ b    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ 3    ┆ 8    ┆ c    │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+    #   # │ null ┆ null ┆ null │
+    #   # └──────┴──────┴──────┘
+    def shift(periods)
+      _from_rbdf(_df.shift(periods))
+    end
 
-    # def shift_and_fill
-    # end
+    # Shift the values by a given period and fill the resulting null values.
+    #
+    # @param periods [Integer]
+    #   Number of places to shift (may be negative).
+    # @param fill_value [Object]
+    #   fill nil values with this value.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.shift_and_fill(1, 0)
+    #   # =>
+    #   # shape: (3, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 0   ┆ 0   ┆ 0   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # └─────┴─────┴─────┘
+    def shift_and_fill(periods, fill_value)
+      lazy
+        .shift_and_fill(periods, fill_value)
+        .collect(no_optimization: true, string_cache: false)
+    end
 
+    # Get a mask of all duplicated rows in this DataFrame.
     #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 1],
+    #       "b" => ["x", "y", "z", "x"],
+    #     }
+    #   )
+    #   df.is_duplicated
+    #   # =>
+    #   # shape: (4,)
+    #   # Series: '' [bool]
+    #   # [
+    #   #         true
+    #   #         false
+    #   #         false
+    #   #         true
+    #   # ]
     def is_duplicated
       Utils.wrap_s(_df.is_duplicated)
     end
 
+    # Get a mask of all unique rows in this DataFrame.
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 1],
+    #       "b" => ["x", "y", "z", "x"]
+    #     }
+    #   )
+    #   df.is_unique
+    #   # =>
+    #   # shape: (4,)
+    #   # Series: '' [bool]
+    #   # [
+    #   #         false
+    #   #         true
+    #   #         true
+    #   #         false
+    #   # ]
     def is_unique
       Utils.wrap_s(_df.is_unique)
     end
 
+    # Start a lazy query from this point.
+    #
+    # @return [LazyFrame]
     def lazy
       wrap_ldf(_df.lazy)
     end
 
+    # Select columns from this DataFrame.
+    #
+    # @param exprs [Object]
+    #   Column or columns to select.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.select("foo")
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 1   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 2   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 3   │
+    #   # └─────┘
+    #
+    # @example
+    #   df.select(["foo", "bar"])
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 6   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   │
+    #   # └─────┴─────┘
+    #
+    # @example
+    #   df.select(Polars.col("foo") + 1)
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ i64 │
+    #   # ╞═════╡
+    #   # │ 2   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 3   │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 4   │
+    #   # └─────┘
+    #
+    # @example
+    #   df.select([Polars.col("foo") + 1, Polars.col("bar") + 1])
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌─────┬─────┐
+    #   # │ foo ┆ bar │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 2   ┆ 7   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 8   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 4   ┆ 9   │
+    #   # └─────┴─────┘
+    #
+    # @example
+    #   df.select(Polars.when(Polars.col("foo") > 2).then(10).otherwise(0))
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌─────────┐
+    #   # │ literal │
+    #   # │ ---     │
+    #   # │ i64     │
+    #   # ╞═════════╡
+    #   # │ 0       │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 0       │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 10      │
+    #   # └─────────┘
     def select(exprs)
       _from_rbdf(
         lazy
           .select(exprs)
           .collect(no_optimization: true, string_cache: false)
           ._df
       )
     end
 
+    # Add or overwrite multiple columns in a DataFrame.
+    #
+    # @param exprs [Array]
+    #   Array of Expressions that evaluate to columns.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 4],
+    #       "b" => [0.5, 4, 10, 13],
+    #       "c" => [true, true, false, true]
+    #     }
+    #   )
+    #   df.with_columns(
+    #     [
+    #       (Polars.col("a") ** 2).alias("a^2"),
+    #       (Polars.col("b") / 2).alias("b/2"),
+    #       (Polars.col("c").is_not()).alias("not c")
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (4, 6)
+    #   # ┌─────┬──────┬───────┬──────┬──────┬───────┐
+    #   # │ a   ┆ b    ┆ c     ┆ a^2  ┆ b/2  ┆ not c │
+    #   # │ --- ┆ ---  ┆ ---   ┆ ---  ┆ ---  ┆ ---   │
+    #   # │ i64 ┆ f64  ┆ bool  ┆ f64  ┆ f64  ┆ bool  │
+    #   # ╞═════╪══════╪═══════╪══════╪══════╪═══════╡
+    #   # │ 1   ┆ 0.5  ┆ true  ┆ 1.0  ┆ 0.25 ┆ false │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 4.0  ┆ true  ┆ 4.0  ┆ 2.0  ┆ false │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 10.0 ┆ false ┆ 9.0  ┆ 5.0  ┆ true  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 4   ┆ 13.0 ┆ true  ┆ 16.0 ┆ 6.5  ┆ false │
+    #   # └─────┴──────┴───────┴──────┴──────┴───────┘
     def with_columns(exprs)
       if !exprs.nil? && !exprs.is_a?(Array)
         exprs = [exprs]
       end
       lazy
         .with_columns(exprs)
         .collect(no_optimization: true, string_cache: false)
     end
 
+    # Get number of chunks used by the ChunkedArrays of this DataFrame.
+    #
+    # @param strategy ["first", "all"]
+    #   Return the number of chunks of the 'first' column,
+    #   or 'all' columns in this DataFrame.
+    #
+    # @return [Object]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 4],
+    #       "b" => [0.5, 4, 10, 13],
+    #       "c" => [true, true, false, true]
+    #     }
+    #   )
+    #   df.n_chunks
+    #   # => 1
+    #   df.n_chunks(strategy: "all")
+    #   # => [1, 1, 1]
     def n_chunks(strategy: "first")
       if strategy == "first"
         _df.n_chunks
       elsif strategy == "all"
         get_columns.map(&:n_chunks)
       else
         raise ArgumentError, "Strategy: '{strategy}' not understood. Choose one of {{'first',  'all'}}"
       end
     end
 
+    # Aggregate the columns of this DataFrame to their maximum value.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.max
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # └─────┴─────┴─────┘
     def max(axis: 0)
       if axis == 0
         _from_rbdf(_df.max)
       elsif axis == 1
         Utils.wrap_s(_df.hmax)
       else
         raise ArgumentError, "Axis should be 0 or 1."
       end
     end
 
+    # Aggregate the columns of this DataFrame to their minimum value.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.min
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 6   ┆ a   │
+    #   # └─────┴─────┴─────┘
     def min(axis: 0)
       if axis == 0
         _from_rbdf(_df.min)
       elsif axis == 1
         Utils.wrap_s(_df.hmin)
       else
         raise ArgumentError, "Axis should be 0 or 1."
       end
     end
 
+    # Aggregate the columns of this DataFrame to their sum value.
+    #
+    # @param axis [Integer]
+    #   Either 0 or 1.
+    # @param null_strategy ["ignore", "propagate"]
+    #   This argument is only used if axis == 1.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"],
+    #     }
+    #   )
+    #   df.sum
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ i64 ┆ i64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 6   ┆ 21  ┆ null │
+    #   # └─────┴─────┴──────┘
+    #
+    # @example
+    #   df.sum(axis: 1)
+    #   # =>
+    #   # shape: (3,)
+    #   # Series: 'foo' [str]
+    #   # [
+    #   #         "16a"
+    #   #         "27b"
+    #   #         "38c"
+    #   # ]
     def sum(axis: 0, null_strategy: "ignore")
       case axis
       when 0
         _from_rbdf(_df.sum)
       when 1
@@ -1031,10 +2525,37 @@
       else
         raise ArgumentError, "Axis should be 0 or 1."
       end
     end
 
+    # Aggregate the columns of this DataFrame to their mean value.
+    #
+    # @param axis [Integer]
+    #   Either 0 or 1.
+    # @param null_strategy ["ignore", "propagate"]
+    #   This argument is only used if axis == 1.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.mean
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ f64 ┆ f64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 2.0 ┆ 7.0 ┆ null │
+    #   # └─────┴─────┴──────┘
     def mean(axis: 0, null_strategy: "ignore")
       case axis
       when 0
         _from_rbdf(_df.mean)
       when 1
@@ -1042,81 +2563,637 @@
       else
         raise ArgumentError, "Axis should be 0 or 1."
       end
     end
 
+    # Aggregate the columns of this DataFrame to their standard deviation value.
+    #
+    # @param ddof [Integer]
+    #   Degrees of freedom
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.std
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ f64 ┆ f64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 1.0 ┆ 1.0 ┆ null │
+    #   # └─────┴─────┴──────┘
+    #
+    # @example
+    #   df.std(ddof: 0)
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌──────────┬──────────┬──────┐
+    #   # │ foo      ┆ bar      ┆ ham  │
+    #   # │ ---      ┆ ---      ┆ ---  │
+    #   # │ f64      ┆ f64      ┆ str  │
+    #   # ╞══════════╪══════════╪══════╡
+    #   # │ 0.816497 ┆ 0.816497 ┆ null │
+    #   # └──────────┴──────────┴──────┘
     def std(ddof: 1)
       _from_rbdf(_df.std(ddof))
     end
 
+    # Aggregate the columns of this DataFrame to their variance value.
+    #
+    # @param ddof [Integer]
+    #   Degrees of freedom
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.var
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ f64 ┆ f64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 1.0 ┆ 1.0 ┆ null │
+    #   # └─────┴─────┴──────┘
+    #
+    # @example
+    #   df.var(ddof: 0)
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌──────────┬──────────┬──────┐
+    #   # │ foo      ┆ bar      ┆ ham  │
+    #   # │ ---      ┆ ---      ┆ ---  │
+    #   # │ f64      ┆ f64      ┆ str  │
+    #   # ╞══════════╪══════════╪══════╡
+    #   # │ 0.666667 ┆ 0.666667 ┆ null │
+    #   # └──────────┴──────────┴──────┘
     def var(ddof: 1)
       _from_rbdf(_df.var(ddof))
     end
 
+    # Aggregate the columns of this DataFrame to their median value.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.median
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ f64 ┆ f64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 2.0 ┆ 7.0 ┆ null │
+    #   # └─────┴─────┴──────┘
     def median
       _from_rbdf(_df.median)
     end
 
-    # def product
-    # end
+    # Aggregate the columns of this DataFrame to their product values.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3],
+    #       "b" => [0.5, 4, 10],
+    #       "c" => [true, true, false]
+    #     }
+    #   )
+    #   df.product
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬──────┬─────┐
+    #   # │ a   ┆ b    ┆ c   │
+    #   # │ --- ┆ ---  ┆ --- │
+    #   # │ i64 ┆ f64  ┆ i64 │
+    #   # ╞═════╪══════╪═════╡
+    #   # │ 6   ┆ 20.0 ┆ 0   │
+    #   # └─────┴──────┴─────┘
+    def product
+      select(Polars.all.product)
+    end
 
-    # def quantile(quantile, interpolation: "nearest")
-    # end
+    # Aggregate the columns of this DataFrame to their quantile value.
+    #
+    # @param quantile [Float]
+    #   Quantile between 0.0 and 1.0.
+    # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
+    #   Interpolation method.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.quantile(0.5, interpolation: "nearest")
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬──────┐
+    #   # │ foo ┆ bar ┆ ham  │
+    #   # │ --- ┆ --- ┆ ---  │
+    #   # │ f64 ┆ f64 ┆ str  │
+    #   # ╞═════╪═════╪══════╡
+    #   # │ 2.0 ┆ 7.0 ┆ null │
+    #   # └─────┴─────┴──────┘
+    def quantile(quantile, interpolation: "nearest")
+      _from_rbdf(_df.quantile(quantile, interpolation))
+    end
 
-    # def to_dummies
-    # end
+    # Get one hot encoded dummy variables.
+    #
+    # @param columns
+    #   A subset of columns to convert to dummy variables. `nil` means
+    #   "all columns".
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2],
+    #       "bar" => [3, 4],
+    #       "ham" => ["a", "b"]
+    #     }
+    #   )
+    #   df.to_dummies
+    #   # =>
+    #   # shape: (2, 6)
+    #   # ┌───────┬───────┬───────┬───────┬───────┬───────┐
+    #   # │ foo_1 ┆ foo_2 ┆ bar_3 ┆ bar_4 ┆ ham_a ┆ ham_b │
+    #   # │ ---   ┆ ---   ┆ ---   ┆ ---   ┆ ---   ┆ ---   │
+    #   # │ u8    ┆ u8    ┆ u8    ┆ u8    ┆ u8    ┆ u8    │
+    #   # ╞═══════╪═══════╪═══════╪═══════╪═══════╪═══════╡
+    #   # │ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     │
+    #   # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 0     ┆ 1     ┆ 0     ┆ 1     ┆ 0     ┆ 1     │
+    #   # └───────┴───────┴───────┴───────┴───────┴───────┘
+    def to_dummies(columns: nil)
+      if columns.is_a?(String)
+        columns = [columns]
+      end
+      _from_rbdf(_df.to_dummies(columns))
+    end
 
-    # def unique
-    # end
+    # Drop duplicate rows from this DataFrame.
+    #
+    # @param maintain_order [Boolean]
+    #   Keep the same order as the original DataFrame. This requires more work to
+    #   compute.
+    # @param subset [Object]
+    #   Subset to use to compare rows.
+    # @param keep ["first", "last"]
+    #   Which of the duplicate rows to keep (in conjunction with `subset`).
+    #
+    # @return [DataFrame]
+    #
+    # @note
+    #   Note that this fails if there is a column of type `List` in the DataFrame or
+    #   subset.
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 1, 2, 3, 4, 5],
+    #       "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
+    #       "c" => [true, true, true, false, true, true]
+    #     }
+    #   )
+    #   df.unique
+    #   # =>
+    #   # shape: (5, 3)
+    #   # ┌─────┬─────┬───────┐
+    #   # │ a   ┆ b   ┆ c     │
+    #   # │ --- ┆ --- ┆ ---   │
+    #   # │ i64 ┆ f64 ┆ bool  │
+    #   # ╞═════╪═════╪═══════╡
+    #   # │ 1   ┆ 0.5 ┆ true  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 1.0 ┆ true  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 3   ┆ 2.0 ┆ false │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 4   ┆ 3.0 ┆ true  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ 5   ┆ 3.0 ┆ true  │
+    #   # └─────┴─────┴───────┘
+    def unique(maintain_order: true, subset: nil, keep: "first")
+      if !subset.nil?
+        if subset.is_a?(String)
+          subset = [subset]
+        elsif !subset.is_a?(Array)
+          subset = subset.to_a
+        end
+      end
 
-    # def n_unique
-    # end
+      _from_rbdf(_df.unique(maintain_order, subset, keep))
+    end
 
+    # Return the number of unique rows, or the number of unique row-subsets.
     #
+    # @param subset [Object]
+    #   One or more columns/expressions that define what to count;
+    #   omit to return the count of unique rows.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 1, 2, 3, 4, 5],
+    #       "b" => [0.5, 0.5, 1.0, 2.0, 3.0, 3.0],
+    #       "c" => [true, true, true, false, true, true]
+    #     }
+    #   )
+    #   df.n_unique
+    #   # => 5
+    #
+    # @example Simple columns subset
+    #   df.n_unique(subset: ["b", "c"])
+    #   # => 4
+    #
+    # @example Expression subset
+    #   df.n_unique(
+    #     subset: [
+    #       (Polars.col("a").floordiv(2)),
+    #       (Polars.col("c") | (Polars.col("b") >= 2))
+    #     ]
+    #   )
+    #   # => 3
+    def n_unique(subset: nil)
+      if subset.is_a?(StringIO)
+        subset = [Polars.col(subset)]
+      elsif subset.is_a?(Expr)
+        subset = [subset]
+      end
+
+      if subset.is_a?(Array) && subset.length == 1
+        expr = Utils.expr_to_lit_or_expr(subset[0], str_to_lit: false)
+      else
+        struct_fields = subset.nil? ? Polars.all : subset
+        expr = Polars.struct(struct_fields)
+      end
+
+      df = lazy.select(expr.n_unique).collect
+      df.is_empty ? 0 : df.row(0)[0]
+    end
+
+    # Rechunk the data in this DataFrame to a contiguous allocation.
+
+    # This will make sure all subsequent operations have optimal and predictable
+    # performance.
+    #
+    # @return [DataFrame]
     def rechunk
       _from_rbdf(_df.rechunk)
     end
 
+    # Create a new DataFrame that shows the null counts per column.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, nil, 3],
+    #       "bar" => [6, 7, nil],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.null_count
+    #   # =>
+    #   # shape: (1, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ u32 ┆ u32 ┆ u32 │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 1   ┆ 1   ┆ 0   │
+    #   # └─────┴─────┴─────┘
     def null_count
       _from_rbdf(_df.null_count)
     end
 
-    # def sample
-    # end
+    # Sample from this DataFrame.
+    #
+    # @param n [Integer]
+    #   Number of items to return. Cannot be used with `frac`. Defaults to 1 if
+    #   `frac` is nil.
+    # @param frac [Float]
+    #   Fraction of items to return. Cannot be used with `n`.
+    # @param with_replacement [Boolean]
+    #   Allow values to be sampled more than once.
+    # @param shuffle [Boolean]
+    #   Shuffle the order of sampled data points.
+    # @param seed [Integer]
+    #   Seed for the random number generator. If set to nil (default), a random
+    #   seed is used.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.sample(n: 2, seed: 0)
+    #   # =>
+    #   # shape: (2, 3)
+    #   # ┌─────┬─────┬─────┐
+    #   # │ foo ┆ bar ┆ ham │
+    #   # │ --- ┆ --- ┆ --- │
+    #   # │ i64 ┆ i64 ┆ str │
+    #   # ╞═════╪═════╪═════╡
+    #   # │ 3   ┆ 8   ┆ c   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 2   ┆ 7   ┆ b   │
+    #   # └─────┴─────┴─────┘
+    def sample(
+      n: nil,
+      frac: nil,
+      with_replacement: false,
+      shuffle: false,
+      seed: nil
+    )
+      if !n.nil? && !frac.nil?
+        raise ArgumentError, "cannot specify both `n` and `frac`"
+      end
 
+      if n.nil? && !frac.nil?
+        _from_rbdf(
+          _df.sample_frac(frac, with_replacement, shuffle, seed)
+        )
+      end
+
+      if n.nil?
+        n = 1
+      end
+      _from_rbdf(_df.sample_n(n, with_replacement, shuffle, seed))
+    end
+
     # def fold
     # end
 
-    # def row
-    # end
+    # Get a row as tuple, either by index or by predicate.
+    #
+    # @param index [Object]
+    #   Row index.
+    # @param by_predicate [Object]
+    #   Select the row according to a given expression/predicate.
+    #
+    # @return [Object]
+    #
+    # @note
+    #   The `index` and `by_predicate` params are mutually exclusive. Additionally,
+    #   to ensure clarity, the `by_predicate` parameter must be supplied by keyword.
+    #
+    #   When using `by_predicate` it is an error condition if anything other than
+    #   one row is returned; more than one row raises `TooManyRowsReturned`, and
+    #   zero rows will raise `NoRowsReturned` (both inherit from `RowsException`).
+    #
+    # @example Return the row at the given index
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, 2, 3],
+    #       "bar" => [6, 7, 8],
+    #       "ham" => ["a", "b", "c"]
+    #     }
+    #   )
+    #   df.row(2)
+    #   # => [3, 8, "c"]
+    #
+    # @example Return the row that matches the given predicate
+    #   df.row(by_predicate: Polars.col("ham") == "b")
+    #   # => [2, 7, "b"]
+    def row(index = nil, by_predicate: nil)
+      if !index.nil? && !by_predicate.nil?
+        raise ArgumentError, "Cannot set both 'index' and 'by_predicate'; mutually exclusive"
+      elsif index.is_a?(Expr)
+        raise TypeError, "Expressions should be passed to the 'by_predicate' param"
+      elsif index.is_a?(Integer)
+        _df.row_tuple(index)
+      elsif by_predicate.is_a?(Expr)
+        rows = filter(by_predicate).rows
+        n_rows = rows.length
+        if n_rows > 1
+          raise TooManyRowsReturned, "Predicate #{by_predicate} returned #{n_rows} rows"
+        elsif n_rows == 0
+          raise NoRowsReturned, "Predicate <{by_predicate!s}> returned no rows"
+        end
+        rows[0]
+      else
+        raise ArgumentError, "One of 'index' or 'by_predicate' must be set"
+      end
+    end
 
-    # def rows
-    # end
+    # Convert columnar data to rows as Ruby arrays.
+    #
+    # @return [Array]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 3, 5],
+    #       "b" => [2, 4, 6]
+    #     }
+    #   )
+    #   df.rows
+    #   # => [[1, 2], [3, 4], [5, 6]]
+    def rows
+      _df.row_tuples
+    end
 
-    # def shrink_to_fit
-    # end
+    # Shrink DataFrame memory usage.
+    #
+    # Shrinks to fit the exact capacity needed to hold the data.
+    #
+    # @return [DataFrame]
+    def shrink_to_fit(in_place: false)
+      if in_place
+        _df.shrink_to_fit
+        self
+      else
+        df = clone
+        df._df.shrink_to_fit
+        df
+      end
+    end
 
-    # def take_every
-    # end
+    # Take every nth row in the DataFrame and return as a new DataFrame.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   s = Polars::DataFrame.new({"a" => [1, 2, 3, 4], "b" => [5, 6, 7, 8]})
+    #   s.take_every(2)
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌─────┬─────┐
+    #   # │ a   ┆ b   │
+    #   # │ --- ┆ --- │
+    #   # │ i64 ┆ i64 │
+    #   # ╞═════╪═════╡
+    #   # │ 1   ┆ 5   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 3   ┆ 7   │
+    #   # └─────┴─────┘
+    def take_every(n)
+      select(Utils.col("*").take_every(n))
+    end
 
     # def hash_rows
     # end
 
-    # def interpolate
-    # end
+    # Interpolate intermediate values. The interpolation method is linear.
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "foo" => [1, nil, 9, 10],
+    #       "bar" => [6, 7, 9, nil],
+    #       "baz" => [1, nil, nil, 9]
+    #     }
+    #   )
+    #   df.interpolate
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────┬──────┬─────┐
+    #   # │ foo ┆ bar  ┆ baz │
+    #   # │ --- ┆ ---  ┆ --- │
+    #   # │ i64 ┆ i64  ┆ i64 │
+    #   # ╞═════╪══════╪═════╡
+    #   # │ 1   ┆ 6    ┆ 1   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 5   ┆ 7    ┆ 3   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 9   ┆ 9    ┆ 6   │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+    #   # │ 10  ┆ null ┆ 9   │
+    #   # └─────┴──────┴─────┘
+    def interpolate
+      select(Utils.col("*").interpolate)
+    end
 
+    # Check if the dataframe is empty.
     #
+    # @return [Boolean]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [1, 2, 3], "bar" => [4, 5, 6]})
+    #   df.is_empty
+    #   # => false
+    #   df.filter(Polars.col("foo") > 99).is_empty
+    #   # => true
     def is_empty
       height == 0
     end
     alias_method :empty?, :is_empty
 
-    # def to_struct(name)
-    # end
+    # Convert a `DataFrame` to a `Series` of type `Struct`.
+    #
+    # @param name [String]
+    #   Name for the struct Series
+    #
+    # @return [Series]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "a" => [1, 2, 3, 4, 5],
+    #       "b" => ["one", "two", "three", "four", "five"]
+    #     }
+    #   )
+    #   df.to_struct("nums")
+    #   # =>
+    #   # shape: (5,)
+    #   # Series: 'nums' [struct[2]]
+    #   # [
+    #   #         {1,"one"}
+    #   #         {2,"two"}
+    #   #         {3,"three"}
+    #   #         {4,"four"}
+    #   #         {5,"five"}
+    #   # ]
+    def to_struct(name)
+      Utils.wrap_s(_df.to_struct(name))
+    end
 
-    # def unnest
-    # end
+    # Decompose a struct into its fields.
+    #
+    # The fields will be inserted into the `DataFrame` on the location of the
+    # `struct` type.
+    #
+    # @param names [Object]
+    #  Names of the struct columns that will be decomposed by its fields
+    #
+    # @return [DataFrame]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "before" => ["foo", "bar"],
+    #       "t_a" => [1, 2],
+    #       "t_b" => ["a", "b"],
+    #       "t_c" => [true, nil],
+    #       "t_d" => [[1, 2], [3]],
+    #       "after" => ["baz", "womp"]
+    #     }
+    #   ).select(["before", Polars.struct(Polars.col("^t_.$")).alias("t_struct"), "after"])
+    #   df.unnest("t_struct")
+    #   # =>
+    #   # shape: (2, 6)
+    #   # ┌────────┬─────┬─────┬──────┬───────────┬───────┐
+    #   # │ before ┆ t_a ┆ t_b ┆ t_c  ┆ t_d       ┆ after │
+    #   # │ ---    ┆ --- ┆ --- ┆ ---  ┆ ---       ┆ ---   │
+    #   # │ str    ┆ i64 ┆ str ┆ bool ┆ list[i64] ┆ str   │
+    #   # ╞════════╪═════╪═════╪══════╪═══════════╪═══════╡
+    #   # │ foo    ┆ 1   ┆ a   ┆ true ┆ [1, 2]    ┆ baz   │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+    #   # │ bar    ┆ 2   ┆ b   ┆ null ┆ [3]       ┆ womp  │
+    #   # └────────┴─────┴─────┴──────┴───────────┴───────┘
+    def unnest(names)
+      if names.is_a?(String)
+        names = [names]
+      end
+      _from_rbdf(_df.unnest(names))
+    end
 
     private
 
     def initialize_copy(other)
       super
@@ -1125,11 +3202,11 @@
 
     def hash_to_rbdf(data, columns: nil)
       if !columns.nil?
         columns, dtypes = _unpack_columns(columns, lookup_names: data.keys)
 
-        if !data && dtypes
+        if data.empty? && dtypes
           data_series = columns.map { |name| Series.new(name, [], dtype: dtypes[name])._s }
         else
           data_series = data.map { |name, values| Series.new(name, values, dtype: dtypes[name])._s }
         end
         data_series = _handle_columns_arg(data_series, columns: columns)
@@ -1145,11 +3222,11 @@
 
     def _handle_columns_arg(data, columns: nil)
       if columns.nil?
         data
       else
-        if !data
+        if data.empty?
           columns.map { |c| Series.new(c, nil)._s }
         elsif data.length == columns.length
           columns.each_with_index do |c, i|
             # not in-place?
             data[i].rename(c)
@@ -1179,8 +3256,78 @@
       LazyFrame._from_rbldf(ldf)
     end
 
     def _from_rbdf(rb_df)
       self.class._from_rbdf(rb_df)
+    end
+
+    def _comp(other, op)
+      if other.is_a?(DataFrame)
+        _compare_to_other_df(other, op)
+      else
+        _compare_to_non_df(other, op)
+      end
+    end
+
+    def _compare_to_other_df(other, op)
+      if columns != other.columns
+        raise ArgmentError, "DataFrame columns do not match"
+      end
+      if shape != other.shape
+        raise ArgmentError, "DataFrame dimensions do not match"
+      end
+
+      suffix = "__POLARS_CMP_OTHER"
+      other_renamed = other.select(Polars.all.suffix(suffix))
+      combined = Polars.concat([self, other_renamed], how: "horizontal")
+
+      expr = case op
+      when "eq"
+        columns.map { |n| Polars.col(n) == Polars.col("#{n}#{suffix}") }
+      when "neq"
+        columns.map { |n| Polars.col(n) != Polars.col("#{n}#{suffix}") }
+      when "gt"
+        columns.map { |n| Polars.col(n) > Polars.col("#{n}#{suffix}") }
+      when "lt"
+        columns.map { |n| Polars.col(n) < Polars.col("#{n}#{suffix}") }
+      when "gt_eq"
+        columns.map { |n| Polars.col(n) >= Polars.col("#{n}#{suffix}") }
+      when "lt_eq"
+        columns.map { |n| Polars.col(n) <= Polars.col("#{n}#{suffix}") }
+      else
+        raise ArgumentError, "got unexpected comparison operator: #{op}"
+      end
+
+      combined.select(expr)
+    end
+
+    def _compare_to_non_df(other, op)
+      case op
+      when "eq"
+        select(Polars.all == other)
+      when "neq"
+        select(Polars.all != other)
+      when "gt"
+        select(Polars.all > other)
+      when "lt"
+        select(Polars.all < other)
+      when "gt_eq"
+        select(Polars.all >= other)
+      when "lt_eq"
+        select(Polars.all <= other)
+      else
+        raise ArgumentError, "got unexpected comparison operator: #{op}"
+      end
+    end
+
+    def _prepare_other_arg(other)
+      if !other.is_a?(Series)
+        if other.is_a?(Array)
+          raise ArgumentError, "Operation not supported."
+        end
+
+        other = Series.new("", [other])
+      end
+      other
     end
   end
 end