string_expr.rb in polars-df-0.1.3

- old
+ new

@@ -1,77 +1,481 @@
 module Polars
+  # Namespace for string related expressions.
   class StringExpr
+    # @private
     attr_accessor :_rbexpr
 
+    # @private
     def initialize(expr)
       self._rbexpr = expr._rbexpr
     end
 
     # def strptime
     # end
 
+    # Get length of the strings as `:u32` (as number of bytes).
+    #
+    # @return [Expr]
+    #
+    # @note
+    #   The returned lengths are equal to the number of bytes in the UTF8 string. If you
+    #   need the length in terms of the number of characters, use `n_chars` instead.
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
+    #     [
+    #       Polars.col("s").str.lengths.alias("length"),
+    #       Polars.col("s").str.n_chars.alias("nchars")
+    #     ]
+    #   )
+    #   df
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌──────┬────────┬────────┐
+    #   # │ s    ┆ length ┆ nchars │
+    #   # │ ---  ┆ ---    ┆ ---    │
+    #   # │ str  ┆ u32    ┆ u32    │
+    #   # ╞══════╪════════╪════════╡
+    #   # │ Café ┆ 5      ┆ 4      │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ null ┆ null   ┆ null   │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 345  ┆ 3      ┆ 3      │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 東京 ┆ 6      ┆ 2      │
+    #   # └──────┴────────┴────────┘
     def lengths
       Utils.wrap_expr(_rbexpr.str_lengths)
     end
 
+    # Get length of the strings as `:u32` (as number of chars).
+    #
+    # @return [Expr]
+    #
+    # @note
+    #   If you know that you are working with ASCII text, `lengths` will be
+    #   equivalent, and faster (returns length in terms of the number of bytes).
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
+    #     [
+    #       Polars.col("s").str.lengths.alias("length"),
+    #       Polars.col("s").str.n_chars.alias("nchars")
+    #     ]
+    #   )
+    #   df
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌──────┬────────┬────────┐
+    #   # │ s    ┆ length ┆ nchars │
+    #   # │ ---  ┆ ---    ┆ ---    │
+    #   # │ str  ┆ u32    ┆ u32    │
+    #   # ╞══════╪════════╪════════╡
+    #   # │ Café ┆ 5      ┆ 4      │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ null ┆ null   ┆ null   │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 345  ┆ 3      ┆ 3      │
+    #   # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 東京 ┆ 6      ┆ 2      │
+    #   # └──────┴────────┴────────┘
     def n_chars
       Utils.wrap_expr(_rbexpr.str_n_chars)
     end
 
+    # Vertically concat the values in the Series to a single string value.
+    #
+    # @param delimiter [String]
+    #   The delimiter to insert between consecutive string values.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
+    #   df.select(Polars.col("foo").str.concat("-"))
+    #   # =>
+    #   # shape: (1, 1)
+    #   # ┌──────────┐
+    #   # │ foo      │
+    #   # │ ---      │
+    #   # │ str      │
+    #   # ╞══════════╡
+    #   # │ 1-null-2 │
+    #   # └──────────┘
     def concat(delimiter = "-")
       Utils.wrap_expr(_rbexpr.str_concat(delimiter))
     end
 
+    # Transform to uppercase variant.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => ["cat", "dog"]})
+    #   df.select(Polars.col("foo").str.to_uppercase)
+    #   # =>
+    #   # shape: (2, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ str │
+    #   # ╞═════╡
+    #   # │ CAT │
+    #   # ├╌╌╌╌╌┤
+    #   # │ DOG │
+    #   # └─────┘
     def to_uppercase
       Utils.wrap_expr(_rbexpr.str_to_uppercase)
     end
 
+    # Transform to lowercase variant.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => ["CAT", "DOG"]})
+    #   df.select(Polars.col("foo").str.to_lowercase)
+    #   # =>
+    #   # shape: (2, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ str │
+    #   # ╞═════╡
+    #   # │ cat │
+    #   # ├╌╌╌╌╌┤
+    #   # │ dog │
+    #   # └─────┘
     def to_lowercase
       Utils.wrap_expr(_rbexpr.str_to_lowercase)
     end
 
+    # Remove leading and trailing whitespace.
+    #
+    # @param matches [String, nil]
+    #   An optional single character that should be trimmed.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
+    #   df.select(Polars.col("foo").str.strip)
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌───────┐
+    #   # │ foo   │
+    #   # │ ---   │
+    #   # │ str   │
+    #   # ╞═══════╡
+    #   # │ lead  │
+    #   # ├╌╌╌╌╌╌╌┤
+    #   # │ trail │
+    #   # ├╌╌╌╌╌╌╌┤
+    #   # │ both  │
+    #   # └───────┘
     def strip(matches = nil)
       if !matches.nil? && matches.length > 1
         raise ArgumentError, "matches should contain a single character"
       end
       Utils.wrap_expr(_rbexpr.str_strip(matches))
     end
 
+    # Remove leading whitespace.
+    #
+    # @param matches [String, nil]
+    #   An optional single character that should be trimmed.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
+    #   df.select(Polars.col("foo").str.lstrip)
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌────────┐
+    #   # │ foo    │
+    #   # │ ---    │
+    #   # │ str    │
+    #   # ╞════════╡
+    #   # │ lead   │
+    #   # ├╌╌╌╌╌╌╌╌┤
+    #   # │ trail  │
+    #   # ├╌╌╌╌╌╌╌╌┤
+    #   # │ both   │
+    #   # └────────┘
     def lstrip(matches = nil)
       if !matches.nil? && matches.length > 1
         raise ArgumentError, "matches should contain a single character"
       end
       Utils.wrap_expr(_rbexpr.str_lstrip(matches))
     end
 
+    # Remove trailing whitespace.
+    #
+    # @param matches [String, nil]
+    #   An optional single character that should be trimmed.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
+    #   df.select(Polars.col("foo").str.rstrip)
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌───────┐
+    #   # │ foo   │
+    #   # │ ---   │
+    #   # │ str   │
+    #   # ╞═══════╡
+    #   # │  lead │
+    #   # ├╌╌╌╌╌╌╌┤
+    #   # │ trail │
+    #   # ├╌╌╌╌╌╌╌┤
+    #   # │  both │
+    #   # └───────┘
     def rstrip(matches = nil)
       if !matches.nil? && matches.length > 1
         raise ArgumentError, "matches should contain a single character"
       end
       Utils.wrap_expr(_rbexpr.str_rstrip(matches))
     end
 
+    # Fills the string with zeroes.
+    #
+    # Return a copy of the string left filled with ASCII '0' digits to make a string
+    # of length width.
+    #
+    # A leading sign prefix ('+'/'-') is handled by inserting the padding after the
+    # sign character rather than before. The original string is returned if width is
+    # less than or equal to `s.length`.
+    #
+    # @param alignment [Integer]
+    #   Fill the value up to this length
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new(
+    #     {
+    #       "num" => [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, nil]
+    #     }
+    #   )
+    #   df.with_column(Polars.col("num").cast(String).str.zfill(5))
+    #   # =>
+    #   # shape: (11, 1)
+    #   # ┌─────────┐
+    #   # │ num     │
+    #   # │ ---     │
+    #   # │ str     │
+    #   # ╞═════════╡
+    #   # │ -0010   │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ -0001   │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 00000   │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 00001   │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ ...     │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 10000   │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 100000  │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ 1000000 │
+    #   # ├╌╌╌╌╌╌╌╌╌┤
+    #   # │ null    │
+    #   # └─────────┘
     def zfill(alignment)
       Utils.wrap_expr(_rbexpr.str_zfill(alignment))
     end
 
+    # Return the string left justified in a string of length `width`.
+    #
+    # Padding is done using the specified `fillcha``.
+    # The original string is returned if `width` is less than or equal to
+    # `s.length`.
+    #
+    # @param width [Integer]
+    #   Justify left to this length.
+    # @param fillchar [String]
+    #   Fill with this ASCII character.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => ["cow", "monkey", nil, "hippopotamus"]})
+    #   df.select(Polars.col("a").str.ljust(8, "*"))
+    #   # =>
+    #   # shape: (4, 1)
+    #   # ┌──────────────┐
+    #   # │ a            │
+    #   # │ ---          │
+    #   # │ str          │
+    #   # ╞══════════════╡
+    #   # │ cow*****     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ monkey**     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ null         │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ hippopotamus │
+    #   # └──────────────┘
     def ljust(width, fillchar = " ")
       Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
     end
 
+    # Return the string right justified in a string of length ``width``.
+    #
+    # Padding is done using the specified `fillchar`.
+    # The original string is returned if `width` is less than or equal to
+    # `s.length`.
+    #
+    # @param width [Integer]
+    #   Justify right to this length.
+    # @param fillchar [String]
+    #   Fill with this ASCII character.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => ["cow", "monkey", nil, "hippopotamus"]})
+    #   df.select(Polars.col("a").str.rjust(8, "*"))
+    #   # =>
+    #   # shape: (4, 1)
+    #   # ┌──────────────┐
+    #   # │ a            │
+    #   # │ ---          │
+    #   # │ str          │
+    #   # ╞══════════════╡
+    #   # │ *****cow     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ **monkey     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ null         │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ hippopotamus │
+    #   # └──────────────┘
     def rjust(width, fillchar = " ")
       Utils.wrap_expr(_rbexpr.str_rjust(width, fillchar))
     end
 
+    # Check if string contains a substring that matches a regex.
+    #
+    # @param pattern [String]
+    #   A valid regex pattern.
+    # @param literal [Boolean]
+    #   Treat pattern as a literal string.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"a" => ["Crab", "cat and dog", "rab$bit", nil]})
+    #   df.select(
+    #     [
+    #       Polars.col("a"),
+    #       Polars.col("a").str.contains("cat|bit").alias("regex"),
+    #       Polars.col("a").str.contains("rab$", literal: true).alias("literal")
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (4, 3)
+    #   # ┌─────────────┬───────┬─────────┐
+    #   # │ a           ┆ regex ┆ literal │
+    #   # │ ---         ┆ ---   ┆ ---     │
+    #   # │ str         ┆ bool  ┆ bool    │
+    #   # ╞═════════════╪═══════╪═════════╡
+    #   # │ Crab        ┆ false ┆ false   │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ cat and dog ┆ true  ┆ false   │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ rab$bit     ┆ true  ┆ true    │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ null        ┆ null  ┆ null    │
+    #   # └─────────────┴───────┴─────────┘
     def contains(pattern, literal: false)
       Utils.wrap_expr(_rbexpr.str_contains(pattern, literal))
     end
 
+    # Check if string values end with a substring.
+    #
+    # @param sub [String]
+    #   Suffix substring.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"fruits" => ["apple", "mango", nil]})
+    #   df.with_column(
+    #     Polars.col("fruits").str.ends_with("go").alias("has_suffix")
+    #   )
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌────────┬────────────┐
+    #   # │ fruits ┆ has_suffix │
+    #   # │ ---    ┆ ---        │
+    #   # │ str    ┆ bool       │
+    #   # ╞════════╪════════════╡
+    #   # │ apple  ┆ false      │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ mango  ┆ true       │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ null   ┆ null       │
+    #   # └────────┴────────────┘
+    #
+    # @example Using `ends_with` as a filter condition:
+    #   df.filter(Polars.col("fruits").str.ends_with("go"))
+    #   # =>
+    #   # shape: (1, 1)
+    #   # ┌────────┐
+    #   # │ fruits │
+    #   # │ ---    │
+    #   # │ str    │
+    #   # ╞════════╡
+    #   # │ mango  │
+    #   # └────────┘
     def ends_with(sub)
       Utils.wrap_expr(_rbexpr.str_ends_with(sub))
     end
 
+    # Check if string values start with a substring.
+    #
+    # @param sub [String]
+    #   Prefix substring.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"fruits" => ["apple", "mango", nil]})
+    #   df.with_column(
+    #     Polars.col("fruits").str.starts_with("app").alias("has_prefix")
+    #   )
+    #   # =>
+    #   # shape: (3, 2)
+    #   # ┌────────┬────────────┐
+    #   # │ fruits ┆ has_prefix │
+    #   # │ ---    ┆ ---        │
+    #   # │ str    ┆ bool       │
+    #   # ╞════════╪════════════╡
+    #   # │ apple  ┆ true       │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ mango  ┆ false      │
+    #   # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ null   ┆ null       │
+    #   # └────────┴────────────┘
+    #
+    # @example Using `starts_with` as a filter condition:
+    #   df.filter(Polars.col("fruits").str.starts_with("app"))
+    #   # =>
+    #   # shape: (1, 1)
+    #   # ┌────────┐
+    #   # │ fruits │
+    #   # │ ---    │
+    #   # │ str    │
+    #   # ╞════════╡
+    #   # │ apple  │
+    #   # └────────┘
     def starts_with(sub)
       Utils.wrap_expr(_rbexpr.str_starts_with(sub))
     end
 
     # def json_path_match
@@ -81,53 +485,310 @@
     # end
 
     # def encode
     # end
 
+    # Extract the target capture group from provided patterns.
+    #
+    # @param pattern [String]
+    #   A valid regex pattern
+    # @param group_index [Integer]
+    #   Index of the targeted capture group.
+    #   Group 0 mean the whole pattern, first group begin at index 1
+    #   Default to the first capture group
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
+    #   df.select(
+    #     [
+    #       Polars.col("foo").str.extract('(\d+)')
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (2, 1)
+    #   # ┌─────┐
+    #   # │ foo │
+    #   # │ --- │
+    #   # │ str │
+    #   # ╞═════╡
+    #   # │ 123 │
+    #   # ├╌╌╌╌╌┤
+    #   # │ 678 │
+    #   # └─────┘
     def extract(pattern, group_index: 1)
       Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
     end
 
+    # Extracts all matches for the given regex pattern.
+    #
+    # Extracts each successive non-overlapping regex match in an individual string as
+    # an array.
+    #
+    # @param pattern [String]
+    #   A valid regex pattern
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
+    #   df.select(
+    #     [
+    #       Polars.col("foo").str.extract_all('(\d+)').alias("extracted_nrs")
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (2, 1)
+    #   # ┌────────────────┐
+    #   # │ extracted_nrs  │
+    #   # │ ---            │
+    #   # │ list[str]      │
+    #   # ╞════════════════╡
+    #   # │ ["123", "45"]  │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ ["678", "910"] │
+    #   # └────────────────┘
     def extract_all(pattern)
       Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
     end
 
+    # Count all successive non-overlapping regex matches.
+    #
+    # @param pattern [String]
+    #   A valid regex pattern
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
+    #   df.select(
+    #     [
+    #       Polars.col("foo").str.count_match('\d').alias("count_digits")
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (2, 1)
+    #   # ┌──────────────┐
+    #   # │ count_digits │
+    #   # │ ---          │
+    #   # │ u32          │
+    #   # ╞══════════════╡
+    #   # │ 5            │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ 6            │
+    #   # └──────────────┘
     def count_match(pattern)
       Utils.wrap_expr(_rbexpr.count_match(pattern))
     end
 
+    # Split the string by a substring.
+    #
+    # @param by [String]
+    #   Substring to split by.
+    # @param inclusive [Boolean]
+    #   If true, include the split character/string in the results.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"s" => ["foo bar", "foo-bar", "foo bar baz"]})
+    #   df.select(Polars.col("s").str.split(" "))
+    #   # =>
+    #   # shape: (3, 1)
+    #   # ┌───────────────────────┐
+    #   # │ s                     │
+    #   # │ ---                   │
+    #   # │ list[str]             │
+    #   # ╞═══════════════════════╡
+    #   # │ ["foo", "bar"]        │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ ["foo-bar"]           │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ ["foo", "bar", "baz"] │
+    #   # └───────────────────────┘
     def split(by, inclusive: false)
       if inclusive
         Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
       else
         Utils.wrap_expr(_rbexpr.str_split(by))
       end
     end
 
+    # Split the string by a substring using `n` splits.
+    #
+    # Results in a struct of `n+1` fields.
+    #
+    # If it cannot make `n` splits, the remaining field elements will be null.
+    #
+    # @param by [String]
+    #   Substring to split by.
+    # @param n [Integer]
+    #   Number of splits to make.
+    # @param inclusive [Boolean]
+    #   If true, include the split character/string in the results.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"x" => ["a_1", nil, "c", "d_4"]})
+    #   df.select(
+    #     [
+    #       Polars.col("x").str.split_exact("_", 1).alias("fields")
+    #     ]
+    #   )
+    #   # =>
+    #   # shape: (4, 1)
+    #   # ┌─────────────┐
+    #   # │ fields      │
+    #   # │ ---         │
+    #   # │ struct[2]   │
+    #   # ╞═════════════╡
+    #   # │ {"a","1"}   │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ {null,null} │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ {"c",null}  │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ {"d","4"}   │
+    #   # └─────────────┘
     def split_exact(by, n, inclusive: false)
       if inclusive
         Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
       else
         Utils.wrap_expr(_rbexpr.str_split_exact(by, n))
       end
     end
 
+    # Split the string by a substring, restricted to returning at most ``n`` items.
+    #
+    # If the number of possible splits is less than ``n-1``, the remaining field
+    # elements will be null. If the number of possible splits is ``n-1`` or greater,
+    # the last (nth) substring will contain the remainder of the string.
+    #
+    # @param by [String]
+    #   Substring to split by.
+    # @param n [Integer]
+    #   Max number of items to return.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"s" => ["foo bar", nil, "foo-bar", "foo bar baz"]})
+    #   df.select(Polars.col("s").str.splitn(" ", 2).alias("fields"))
+    #   # =>
+    #   # shape: (4, 1)
+    #   # ┌───────────────────┐
+    #   # │ fields            │
+    #   # │ ---               │
+    #   # │ struct[2]         │
+    #   # ╞═══════════════════╡
+    #   # │ {"foo","bar"}     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ {null,null}       │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ {"foo-bar",null}  │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ {"foo","bar baz"} │
+    #   # └───────────────────┘
     def splitn(by, n)
       Utils.wrap_expr(_rbexpr.str_splitn(by, n))
     end
 
-    def replace(pattern, literal: false)
+    # Replace first matching regex/literal substring with a new string value.
+    #
+    # @param pattern [String]
+    #   Regex pattern.
+    # @param value [String]
+    #   Replacement string.
+    # @param literal [Boolean]
+    #   Treat pattern as a literal string.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"id" => [1, 2], "text" => ["123abc", "abc456"]})
+    #   df.with_column(
+    #     Polars.col("text").str.replace('abc\b', "ABC")
+    #   )
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌─────┬────────┐
+    #   # │ id  ┆ text   │
+    #   # │ --- ┆ ---    │
+    #   # │ i64 ┆ str    │
+    #   # ╞═════╪════════╡
+    #   # │ 1   ┆ 123ABC │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ abc456 │
+    #   # └─────┴────────┘
+    def replace(pattern, value, literal: false)
       pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
       value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
       Utils.wrap_expr(_rbexpr.str_replace(pattern._rbexpr, value._rbexpr, literal))
     end
 
-    def replace_all(pattern, literal: false)
+    # Replace all matching regex/literal substrings with a new string value.
+    #
+    # @param pattern [String]
+    #   Regex pattern.
+    # @param value [String]
+    #   Replacement string.
+    # @param literal [Boolean]
+    #   Treat pattern as a literal string.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"id" => [1, 2], "text" => ["abcabc", "123a123"]})
+    #   df.with_column(Polars.col("text").str.replace_all("a", "-"))
+    #   # =>
+    #   # shape: (2, 2)
+    #   # ┌─────┬─────────┐
+    #   # │ id  ┆ text    │
+    #   # │ --- ┆ ---     │
+    #   # │ i64 ┆ str     │
+    #   # ╞═════╪═════════╡
+    #   # │ 1   ┆ -bc-bc  │
+    #   # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+    #   # │ 2   ┆ 123-123 │
+    #   # └─────┴─────────┘
+    def replace_all(pattern, value, literal: false)
       pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
       value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
       Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
     end
 
+    # Create subslices of the string values of a Utf8 Series.
+    #
+    # @param offset [Integer]
+    #   Start index. Negative indexing is supported.
+    # @param length [Integer]
+    #   Length of the slice. If set to `nil` (default), the slice is taken to the
+    #   end of the string.
+    #
+    # @return [Expr]
+    #
+    # @example
+    #   df = Polars::DataFrame.new({"s" => ["pear", nil, "papaya", "dragonfruit"]})
+    #   df.with_column(
+    #     Polars.col("s").str.slice(-3).alias("s_sliced")
+    #   )
+    #   # =>
+    #   # shape: (4, 2)
+    #   # ┌─────────────┬──────────┐
+    #   # │ s           ┆ s_sliced │
+    #   # │ ---         ┆ ---      │
+    #   # │ str         ┆ str      │
+    #   # ╞═════════════╪══════════╡
+    #   # │ pear        ┆ ear      │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ null        ┆ null     │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ papaya      ┆ aya      │
+    #   # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+    #   # │ dragonfruit ┆ uit      │
+    #   # └─────────────┴──────────┘
     def slice(offset, length = nil)
       Utils.wrap_expr(_rbexpr.str_slice(offset, length))
     end
   end
 end