lib/polars/string_expr.rb in polars-df-0.1.2 vs lib/polars/string_expr.rb in polars-df-0.1.3
- old
+ new
@@ -1,77 +1,481 @@
module Polars
+ # Namespace for string related expressions.
class StringExpr
+ # @private
attr_accessor :_rbexpr
+ # @private
def initialize(expr)
self._rbexpr = expr._rbexpr
end
# def strptime
# end
+ # Get length of the strings as `:u32` (as number of bytes).
+ #
+ # @return [Expr]
+ #
+ # @note
+ # The returned lengths are equal to the number of bytes in the UTF8 string. If you
+ # need the length in terms of the number of characters, use `n_chars` instead.
+ #
+ # @example
+ # df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
+ # [
+ # Polars.col("s").str.lengths.alias("length"),
+ # Polars.col("s").str.n_chars.alias("nchars")
+ # ]
+ # )
+ # df
+ # # =>
+ # # shape: (4, 3)
+ # # ┌──────┬────────┬────────┐
+ # # │ s ┆ length ┆ nchars │
+ # # │ --- ┆ --- ┆ --- │
+ # # │ str ┆ u32 ┆ u32 │
+ # # ╞══════╪════════╪════════╡
+ # # │ Café ┆ 5 ┆ 4 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ null ┆ null │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+ # # │ 345 ┆ 3 ┆ 3 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+ # # │ 東京 ┆ 6 ┆ 2 │
+ # # └──────┴────────┴────────┘
def lengths
Utils.wrap_expr(_rbexpr.str_lengths)
end
+ # Get length of the strings as `:u32` (as number of chars).
+ #
+ # @return [Expr]
+ #
+ # @note
+ # If you know that you are working with ASCII text, `lengths` will be
+ # equivalent, and faster (returns length in terms of the number of bytes).
+ #
+ # @example
+ # df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
+ # [
+ # Polars.col("s").str.lengths.alias("length"),
+ # Polars.col("s").str.n_chars.alias("nchars")
+ # ]
+ # )
+ # df
+ # # =>
+ # # shape: (4, 3)
+ # # ┌──────┬────────┬────────┐
+ # # │ s ┆ length ┆ nchars │
+ # # │ --- ┆ --- ┆ --- │
+ # # │ str ┆ u32 ┆ u32 │
+ # # ╞══════╪════════╪════════╡
+ # # │ Café ┆ 5 ┆ 4 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ null ┆ null │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+ # # │ 345 ┆ 3 ┆ 3 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+ # # │ 東京 ┆ 6 ┆ 2 │
+ # # └──────┴────────┴────────┘
def n_chars
Utils.wrap_expr(_rbexpr.str_n_chars)
end
+ # Vertically concat the values in the Series to a single string value.
+ #
+ # @param delimiter [String]
+ # The delimiter to insert between consecutive string values.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
+ # df.select(Polars.col("foo").str.concat("-"))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ str │
+ # # ╞══════════╡
+ # # │ 1-null-2 │
+ # # └──────────┘
def concat(delimiter = "-")
Utils.wrap_expr(_rbexpr.str_concat(delimiter))
end
+ # Transform to uppercase variant.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => ["cat", "dog"]})
+ # df.select(Polars.col("foo").str.to_uppercase)
+ # # =>
+ # # shape: (2, 1)
+ # # ┌─────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ str │
+ # # ╞═════╡
+ # # │ CAT │
+ # # ├╌╌╌╌╌┤
+ # # │ DOG │
+ # # └─────┘
def to_uppercase
Utils.wrap_expr(_rbexpr.str_to_uppercase)
end
+ # Transform to lowercase variant.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => ["CAT", "DOG"]})
+ # df.select(Polars.col("foo").str.to_lowercase)
+ # # =>
+ # # shape: (2, 1)
+ # # ┌─────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ str │
+ # # ╞═════╡
+ # # │ cat │
+ # # ├╌╌╌╌╌┤
+ # # │ dog │
+ # # └─────┘
def to_lowercase
Utils.wrap_expr(_rbexpr.str_to_lowercase)
end
+ # Remove leading and trailing whitespace.
+ #
+ # @param matches [String, nil]
+ # An optional single character that should be trimmed.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
+ # df.select(Polars.col("foo").str.strip)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌───────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ str │
+ # # ╞═══════╡
+ # # │ lead │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ trail │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ both │
+ # # └───────┘
def strip(matches = nil)
if !matches.nil? && matches.length > 1
raise ArgumentError, "matches should contain a single character"
end
Utils.wrap_expr(_rbexpr.str_strip(matches))
end
+ # Remove leading whitespace.
+ #
+ # @param matches [String, nil]
+ # An optional single character that should be trimmed.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
+ # df.select(Polars.col("foo").str.lstrip)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌────────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ str │
+ # # ╞════════╡
+ # # │ lead │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ trail │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ both │
+ # # └────────┘
def lstrip(matches = nil)
if !matches.nil? && matches.length > 1
raise ArgumentError, "matches should contain a single character"
end
Utils.wrap_expr(_rbexpr.str_lstrip(matches))
end
+ # Remove trailing whitespace.
+ #
+ # @param matches [String, nil]
+ # An optional single character that should be trimmed.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
+ # df.select(Polars.col("foo").str.rstrip)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌───────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ str │
+ # # ╞═══════╡
+ # # │ lead │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ trail │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ both │
+ # # └───────┘
def rstrip(matches = nil)
if !matches.nil? && matches.length > 1
raise ArgumentError, "matches should contain a single character"
end
Utils.wrap_expr(_rbexpr.str_rstrip(matches))
end
+ # Fills the string with zeroes.
+ #
+ # Return a copy of the string left filled with ASCII '0' digits to make a string
+ # of length width.
+ #
+ # A leading sign prefix ('+'/'-') is handled by inserting the padding after the
+ # sign character rather than before. The original string is returned if width is
+ # less than or equal to `s.length`.
+ #
+ # @param alignment [Integer]
+ # Fill the value up to this length
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "num" => [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, nil]
+ # }
+ # )
+ # df.with_column(Polars.col("num").cast(String).str.zfill(5))
+ # # =>
+ # # shape: (11, 1)
+ # # ┌─────────┐
+ # # │ num │
+ # # │ --- │
+ # # │ str │
+ # # ╞═════════╡
+ # # │ -0010 │
+ # # ├╌╌╌╌╌╌╌╌╌┤
+ # # │ -0001 │
+ # # ├╌╌╌╌╌╌╌╌╌┤
+ # # │ 00000 │
+ # # ├╌╌╌╌╌╌╌╌╌┤
+ # # │ 00001 │
+ # # ├╌╌╌╌╌╌╌╌╌┤
+ # # │ ... │
+ # # ├╌╌╌╌╌╌╌╌╌┤
+ # # │ 10000 │
+ # # ├╌╌╌╌╌╌╌╌╌┤
+ # # │ 100000 │
+ # # ├╌╌╌╌╌╌╌╌╌┤
+ # # │ 1000000 │
+ # # ├╌╌╌╌╌╌╌╌╌┤
+ # # │ null │
+ # # └─────────┘
def zfill(alignment)
Utils.wrap_expr(_rbexpr.str_zfill(alignment))
end
+ # Return the string left justified in a string of length `width`.
+ #
+ # Padding is done using the specified `fillcha``.
+ # The original string is returned if `width` is less than or equal to
+ # `s.length`.
+ #
+ # @param width [Integer]
+ # Justify left to this length.
+ # @param fillchar [String]
+ # Fill with this ASCII character.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => ["cow", "monkey", nil, "hippopotamus"]})
+ # df.select(Polars.col("a").str.ljust(8, "*"))
+ # # =>
+ # # shape: (4, 1)
+ # # ┌──────────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ str │
+ # # ╞══════════════╡
+ # # │ cow***** │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ monkey** │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ hippopotamus │
+ # # └──────────────┘
def ljust(width, fillchar = " ")
Utils.wrap_expr(_rbexpr.str_ljust(width, fillchar))
end
+ # Return the string right justified in a string of length ``width``.
+ #
+ # Padding is done using the specified `fillchar`.
+ # The original string is returned if `width` is less than or equal to
+ # `s.length`.
+ #
+ # @param width [Integer]
+ # Justify right to this length.
+ # @param fillchar [String]
+ # Fill with this ASCII character.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => ["cow", "monkey", nil, "hippopotamus"]})
+ # df.select(Polars.col("a").str.rjust(8, "*"))
+ # # =>
+ # # shape: (4, 1)
+ # # ┌──────────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ str │
+ # # ╞══════════════╡
+ # # │ *****cow │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ **monkey │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ hippopotamus │
+ # # └──────────────┘
def rjust(width, fillchar = " ")
Utils.wrap_expr(_rbexpr.str_rjust(width, fillchar))
end
+ # Check if string contains a substring that matches a regex.
+ #
+ # @param pattern [String]
+ # A valid regex pattern.
+ # @param literal [Boolean]
+ # Treat pattern as a literal string.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => ["Crab", "cat and dog", "rab$bit", nil]})
+ # df.select(
+ # [
+ # Polars.col("a"),
+ # Polars.col("a").str.contains("cat|bit").alias("regex"),
+ # Polars.col("a").str.contains("rab$", literal: true).alias("literal")
+ # ]
+ # )
+ # # =>
+ # # shape: (4, 3)
+ # # ┌─────────────┬───────┬─────────┐
+ # # │ a ┆ regex ┆ literal │
+ # # │ --- ┆ --- ┆ --- │
+ # # │ str ┆ bool ┆ bool │
+ # # ╞═════════════╪═══════╪═════════╡
+ # # │ Crab ┆ false ┆ false │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+ # # │ cat and dog ┆ true ┆ false │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+ # # │ rab$bit ┆ true ┆ true │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ null ┆ null │
+ # # └─────────────┴───────┴─────────┘
def contains(pattern, literal: false)
Utils.wrap_expr(_rbexpr.str_contains(pattern, literal))
end
+ # Check if string values end with a substring.
+ #
+ # @param sub [String]
+ # Suffix substring.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"fruits" => ["apple", "mango", nil]})
+ # df.with_column(
+ # Polars.col("fruits").str.ends_with("go").alias("has_suffix")
+ # )
+ # # =>
+ # # shape: (3, 2)
+ # # ┌────────┬────────────┐
+ # # │ fruits ┆ has_suffix │
+ # # │ --- ┆ --- │
+ # # │ str ┆ bool │
+ # # ╞════════╪════════════╡
+ # # │ apple ┆ false │
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ mango ┆ true │
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ null │
+ # # └────────┴────────────┘
+ #
+ # @example Using `ends_with` as a filter condition:
+ # df.filter(Polars.col("fruits").str.ends_with("go"))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌────────┐
+ # # │ fruits │
+ # # │ --- │
+ # # │ str │
+ # # ╞════════╡
+ # # │ mango │
+ # # └────────┘
def ends_with(sub)
Utils.wrap_expr(_rbexpr.str_ends_with(sub))
end
+ # Check if string values start with a substring.
+ #
+ # @param sub [String]
+ # Prefix substring.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"fruits" => ["apple", "mango", nil]})
+ # df.with_column(
+ # Polars.col("fruits").str.starts_with("app").alias("has_prefix")
+ # )
+ # # =>
+ # # shape: (3, 2)
+ # # ┌────────┬────────────┐
+ # # │ fruits ┆ has_prefix │
+ # # │ --- ┆ --- │
+ # # │ str ┆ bool │
+ # # ╞════════╪════════════╡
+ # # │ apple ┆ true │
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ mango ┆ false │
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ null │
+ # # └────────┴────────────┘
+ #
+ # @example Using `starts_with` as a filter condition:
+ # df.filter(Polars.col("fruits").str.starts_with("app"))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌────────┐
+ # # │ fruits │
+ # # │ --- │
+ # # │ str │
+ # # ╞════════╡
+ # # │ apple │
+ # # └────────┘
def starts_with(sub)
Utils.wrap_expr(_rbexpr.str_starts_with(sub))
end
# def json_path_match
@@ -81,53 +485,310 @@
# end
# def encode
# end
+ # Extract the target capture group from provided patterns.
+ #
+ # @param pattern [String]
+ # A valid regex pattern
+ # @param group_index [Integer]
+ # Index of the targeted capture group.
+ # Group 0 mean the whole pattern, first group begin at index 1
+ # Default to the first capture group
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
+ # df.select(
+ # [
+ # Polars.col("foo").str.extract('(\d+)')
+ # ]
+ # )
+ # # =>
+ # # shape: (2, 1)
+ # # ┌─────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ str │
+ # # ╞═════╡
+ # # │ 123 │
+ # # ├╌╌╌╌╌┤
+ # # │ 678 │
+ # # └─────┘
def extract(pattern, group_index: 1)
Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
end
+ # Extracts all matches for the given regex pattern.
+ #
+ # Extracts each successive non-overlapping regex match in an individual string as
+ # an array.
+ #
+ # @param pattern [String]
+ # A valid regex pattern
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
+ # df.select(
+ # [
+ # Polars.col("foo").str.extract_all('(\d+)').alias("extracted_nrs")
+ # ]
+ # )
+ # # =>
+ # # shape: (2, 1)
+ # # ┌────────────────┐
+ # # │ extracted_nrs │
+ # # │ --- │
+ # # │ list[str] │
+ # # ╞════════════════╡
+ # # │ ["123", "45"] │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ ["678", "910"] │
+ # # └────────────────┘
def extract_all(pattern)
Utils.wrap_expr(_rbexpr.str_extract_all(pattern))
end
+ # Count all successive non-overlapping regex matches.
+ #
+ # @param pattern [String]
+ # A valid regex pattern
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
+ # df.select(
+ # [
+ # Polars.col("foo").str.count_match('\d').alias("count_digits")
+ # ]
+ # )
+ # # =>
+ # # shape: (2, 1)
+ # # ┌──────────────┐
+ # # │ count_digits │
+ # # │ --- │
+ # # │ u32 │
+ # # ╞══════════════╡
+ # # │ 5 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 6 │
+ # # └──────────────┘
def count_match(pattern)
Utils.wrap_expr(_rbexpr.count_match(pattern))
end
+ # Split the string by a substring.
+ #
+ # @param by [String]
+ # Substring to split by.
+ # @param inclusive [Boolean]
+ # If true, include the split character/string in the results.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"s" => ["foo bar", "foo-bar", "foo bar baz"]})
+ # df.select(Polars.col("s").str.split(" "))
+ # # =>
+ # # shape: (3, 1)
+ # # ┌───────────────────────┐
+ # # │ s │
+ # # │ --- │
+ # # │ list[str] │
+ # # ╞═══════════════════════╡
+ # # │ ["foo", "bar"] │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ ["foo-bar"] │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ ["foo", "bar", "baz"] │
+ # # └───────────────────────┘
def split(by, inclusive: false)
if inclusive
Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
else
Utils.wrap_expr(_rbexpr.str_split(by))
end
end
+ # Split the string by a substring using `n` splits.
+ #
+ # Results in a struct of `n+1` fields.
+ #
+ # If it cannot make `n` splits, the remaining field elements will be null.
+ #
+ # @param by [String]
+ # Substring to split by.
+ # @param n [Integer]
+ # Number of splits to make.
+ # @param inclusive [Boolean]
+ # If true, include the split character/string in the results.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"x" => ["a_1", nil, "c", "d_4"]})
+ # df.select(
+ # [
+ # Polars.col("x").str.split_exact("_", 1).alias("fields")
+ # ]
+ # )
+ # # =>
+ # # shape: (4, 1)
+ # # ┌─────────────┐
+ # # │ fields │
+ # # │ --- │
+ # # │ struct[2] │
+ # # ╞═════════════╡
+ # # │ {"a","1"} │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ {null,null} │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ {"c",null} │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ {"d","4"} │
+ # # └─────────────┘
def split_exact(by, n, inclusive: false)
if inclusive
Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
else
Utils.wrap_expr(_rbexpr.str_split_exact(by, n))
end
end
+ # Split the string by a substring, restricted to returning at most ``n`` items.
+ #
+ # If the number of possible splits is less than ``n-1``, the remaining field
+ # elements will be null. If the number of possible splits is ``n-1`` or greater,
+ # the last (nth) substring will contain the remainder of the string.
+ #
+ # @param by [String]
+ # Substring to split by.
+ # @param n [Integer]
+ # Max number of items to return.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"s" => ["foo bar", nil, "foo-bar", "foo bar baz"]})
+ # df.select(Polars.col("s").str.splitn(" ", 2).alias("fields"))
+ # # =>
+ # # shape: (4, 1)
+ # # ┌───────────────────┐
+ # # │ fields │
+ # # │ --- │
+ # # │ struct[2] │
+ # # ╞═══════════════════╡
+ # # │ {"foo","bar"} │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ {null,null} │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ {"foo-bar",null} │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ {"foo","bar baz"} │
+ # # └───────────────────┘
def splitn(by, n)
Utils.wrap_expr(_rbexpr.str_splitn(by, n))
end
- def replace(pattern, literal: false)
+ # Replace first matching regex/literal substring with a new string value.
+ #
+ # @param pattern [String]
+ # Regex pattern.
+ # @param value [String]
+ # Replacement string.
+ # @param literal [Boolean]
+ # Treat pattern as a literal string.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"id" => [1, 2], "text" => ["123abc", "abc456"]})
+ # df.with_column(
+ # Polars.col("text").str.replace('abc\b', "ABC")
+ # )
+ # # =>
+ # # shape: (2, 2)
+ # # ┌─────┬────────┐
+ # # │ id ┆ text │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ str │
+ # # ╞═════╪════════╡
+ # # │ 1 ┆ 123ABC │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ abc456 │
+ # # └─────┴────────┘
+ def replace(pattern, value, literal: false)
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
Utils.wrap_expr(_rbexpr.str_replace(pattern._rbexpr, value._rbexpr, literal))
end
- def replace_all(pattern, literal: false)
+ # Replace all matching regex/literal substrings with a new string value.
+ #
+ # @param pattern [String]
+ # Regex pattern.
+ # @param value [String]
+ # Replacement string.
+ # @param literal [Boolean]
+ # Treat pattern as a literal string.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"id" => [1, 2], "text" => ["abcabc", "123a123"]})
+ # df.with_column(Polars.col("text").str.replace_all("a", "-"))
+ # # =>
+ # # shape: (2, 2)
+ # # ┌─────┬─────────┐
+ # # │ id ┆ text │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ str │
+ # # ╞═════╪═════════╡
+ # # │ 1 ┆ -bc-bc │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ 123-123 │
+ # # └─────┴─────────┘
+ def replace_all(pattern, value, literal: false)
pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
end
+ # Create subslices of the string values of a Utf8 Series.
+ #
+ # @param offset [Integer]
+ # Start index. Negative indexing is supported.
+ # @param length [Integer]
+ # Length of the slice. If set to `nil` (default), the slice is taken to the
+ # end of the string.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"s" => ["pear", nil, "papaya", "dragonfruit"]})
+ # df.with_column(
+ # Polars.col("s").str.slice(-3).alias("s_sliced")
+ # )
+ # # =>
+ # # shape: (4, 2)
+ # # ┌─────────────┬──────────┐
+ # # │ s ┆ s_sliced │
+ # # │ --- ┆ --- │
+ # # │ str ┆ str │
+ # # ╞═════════════╪══════════╡
+ # # │ pear ┆ ear │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ null │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ papaya ┆ aya │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ dragonfruit ┆ uit │
+ # # └─────────────┴──────────┘
def slice(offset, length = nil)
Utils.wrap_expr(_rbexpr.str_slice(offset, length))
end
end
end