module Polars
  # Namespace for string related expressions.
  class StringExpr
    # @private
    attr_accessor :_rbexpr

    # @private
    def initialize(expr)
      self._rbexpr = expr._rbexpr
    end

    # Convert a Utf8 column into a Date column.
    #
    # @param format [String]
    #   Format to use for conversion. Refer to the
    #   [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
    #   for the full specification. Example: `"%Y-%m-%d"`.
    #   If set to nil (default), the format is inferred from the data.
    # @param strict [Boolean]
    #   Raise an error if any conversion fails.
    # @param exact [Boolean]
    #   Require an exact format match. If false, allow the format to match anywhere
    #   in the target string.
    # @param cache [Boolean]
    #   Use a cache of unique, converted dates to apply the conversion.
    #
    # @return [Expr]
    #
    # @example
    #   s = Polars::Series.new(["2020/01/01", "2020/02/01", "2020/03/01"])
    #   s.str.to_date
    #   # =>
    #   # shape: (3,)
    #   # Series: '' [date]
    #   # [
    #   #         2020-01-01
    #   #         2020-02-01
    #   #         2020-03-01
    #   # ]
    def to_date(format = nil, strict: true, exact: true, cache: true)
      _validate_format_argument(format)
      Utils.wrap_expr(self._rbexpr.str_to_date(format, strict, exact, cache))
    end

    # Convert a Utf8 column into a Datetime column.
    #
    # @param format [String]
    #   Format to use for conversion. Refer to the
    #   [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
    #   for the full specification. Example: `"%Y-%m-%d %H:%M:%S"`.
    #   If set to nil (default), the format is inferred from the data.
    # @param time_unit ["us", "ns", "ms"]
    #   Unit of time for the resulting Datetime column. If set to nil (default),
    #   the time unit is inferred from the format string if given, eg:
    #   `"%F %T%.3f"` => `Datetime("ms")`. If no fractional second component is
    #   found, the default is `"us"`.
    # @param time_zone [String]
    #   Time zone for the resulting Datetime column.
    # @param strict [Boolean]
    #   Raise an error if any conversion fails.
    # @param exact [Boolean]
    #   Require an exact format match. If false, allow the format to match anywhere
    #   in the target string.
    # @param cache [Boolean]
    #   Use a cache of unique, converted datetimes to apply the conversion.
    #
    # @return [Expr]
    #
    # @example
    #   s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
    #   s.str.to_datetime("%Y-%m-%d %H:%M%#z")
    #   # =>
    #   # shape: (2,)
    #   # Series: '' [datetime[μs, UTC]]
    #   # [
    #   #         2020-01-01 01:00:00 UTC
    #   #         2020-01-01 02:00:00 UTC
    #   # ]
    def to_datetime(
      format = nil,
      time_unit: nil,
      time_zone: nil,
      strict: true,
      exact: true,
      cache: true,
      use_earliest: nil,
      ambiguous: "raise"
    )
      _validate_format_argument(format)
      ambiguous = Utils.rename_use_earliest_to_ambiguous(use_earliest, ambiguous)
      ambiguous = Polars.lit(ambiguous) unless ambiguous.is_a?(Expr)
      Utils.wrap_expr(
        self._rbexpr.str_to_datetime(
          format,
          time_unit,
          time_zone,
          strict,
          exact,
          cache,
          ambiguous._rbexpr
        )
      )
    end

    # Convert a Utf8 column into a Time column.
    #
    # @param format [String]
    #   Format to use for conversion. Refer to the
    #   [chrono crate documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
    #   for the full specification. Example: `"%H:%M:%S"`.
    #   If set to nil (default), the format is inferred from the data.
    # @param strict [Boolean]
    #   Raise an error if any conversion fails.
    # @param cache [Boolean]
    #   Use a cache of unique, converted times to apply the conversion.
    #
    # @return [Expr]
    #
    # @example
    #   s = Polars::Series.new(["01:00", "02:00", "03:00"])
    #   s.str.to_time("%H:%M")
    #   # =>
    #   # shape: (3,)
    #   # Series: '' [time]
    #   # [
    #   #         01:00:00
    #   #         02:00:00
    #   #         03:00:00
    #   # ]
    def to_time(format = nil, strict: true, cache: true)
      _validate_format_argument(format)
      Utils.wrap_expr(_rbexpr.str_to_time(format, strict, cache))
    end

    # Parse a Utf8 expression to a Date/Datetime/Time type.
    #
    # @param dtype [Object]
    #   The data type to convert into. Can be either Date, Datetime, or Time.
    # @param format [String]
    #   Format to use, refer to the
    #   [chrono strftime documentation](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)
    #   for specification. Example: `"%y-%m-%d"`.
    # @param strict [Boolean]
    #   Raise an error if any conversion fails.
    # @param exact [Boolean]
    #   - If true, require an exact format match.
    #   - If false, allow the format to match anywhere in the target string.
    # @param utc [Boolean]
    #   Parse timezone aware datetimes as UTC. This may be useful if you have data
    #   with mixed offsets.
    #
    # @return [Expr]
    #
    # @note
    #   When parsing a Datetime the column precision will be inferred from
    #   the format string, if given, eg: "%F %T%.3f" => Datetime("ms"). If
    #   no fractional second component is found then the default is "us".
    #
    # @example Dealing with a consistent format:
    #   s = Polars::Series.new(["2020-01-01 01:00Z", "2020-01-01 02:00Z"])
    #   s.str.strptime(Polars::Datetime, "%Y-%m-%d %H:%M%#z")
    #   # =>
    #   # shape: (2,)
    #   # Series: '' [datetime[μs, UTC]]
    #   # [
    #   #         2020-01-01 01:00:00 UTC
    #   #         2020-01-01 02:00:00 UTC
    #   # ]
    #
    # @example Dealing with different formats.
    #   s = Polars::Series.new(
    #     "date",
    #     [
    #       "2021-04-22",
    #       "2022-01-04 00:00:00",
    #       "01/31/22",
    #       "Sun Jul  8 00:34:60 2001",
    #     ]
    #   )
    #   s.to_frame.select(
    #     Polars.coalesce(
    #       Polars.col("date").str.strptime(Polars::Date, "%F", strict: false),
    #       Polars.col("date").str.strptime(Polars::Date, "%F %T", strict: false),
    #       Polars.col("date").str.strptime(Polars::Date, "%D", strict: false),
    #       Polars.col("date").str.strptime(Polars::Date, "%c", strict: false)
    #     )
    #   ).to_series
    #   # =>
    #   # shape: (4,)
    #   # Series: 'date' [date]
    #   # [
    #   #         2021-04-22
    #   #         2022-01-04
    #   #         2022-01-31
    #   #         2001-07-08
    #   # ]
    def strptime(dtype, format = nil, strict: true, exact: true, cache: true, utc: false)
      _validate_format_argument(format)

      if dtype == Date
        to_date(format, strict: strict, exact: exact, cache: cache)
      elsif dtype == Datetime || dtype.is_a?(Datetime)
        dtype = Datetime.new if dtype == Datetime
        time_unit = dtype.time_unit
        time_zone = dtype.time_zone
        to_datetime(format, time_unit: time_unit, time_zone: time_zone, strict: strict, exact: exact, cache: cache)
      elsif dtype == Time
        to_time(format, strict: strict, cache: cache)
      else
        raise ArgumentError, "dtype should be of type {Date, Datetime, Time}"
      end
    end

    # Get length of the strings as `:u32` (as number of bytes).
    #
    # @return [Expr]
    #
    # @note
    #   The returned lengths are equal to the number of bytes in the UTF8 string. If you
    #   need the length in terms of the number of characters, use `n_chars` instead.
    #
    # @example
    #   df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
    #     [
    #       Polars.col("s").str.lengths.alias("length"),
    #       Polars.col("s").str.n_chars.alias("nchars")
    #     ]
    #   )
    #   df
    #   # =>
    #   # shape: (4, 3)
    #   # ┌──────┬────────┬────────┐
    #   # │ s    ┆ length ┆ nchars │
    #   # │ ---  ┆ ---    ┆ ---    │
    #   # │ str  ┆ u32    ┆ u32    │
    #   # ╞══════╪════════╪════════╡
    #   # │ Café ┆ 5      ┆ 4      │
    #   # │ null ┆ null   ┆ null   │
    #   # │ 345  ┆ 3      ┆ 3      │
    #   # │ 東京 ┆ 6      ┆ 2      │
    #   # └──────┴────────┴────────┘
    def lengths
      Utils.wrap_expr(_rbexpr.str_len_bytes)
    end

    # Get length of the strings as `:u32` (as number of chars).
    #
    # @return [Expr]
    #
    # @note
    #   If you know that you are working with ASCII text, `lengths` will be
    #   equivalent, and faster (returns length in terms of the number of bytes).
    #
    # @example
    #   df = Polars::DataFrame.new({"s" => ["Café", nil, "345", "東京"]}).with_columns(
    #     [
    #       Polars.col("s").str.lengths.alias("length"),
    #       Polars.col("s").str.n_chars.alias("nchars")
    #     ]
    #   )
    #   df
    #   # =>
    #   # shape: (4, 3)
    #   # ┌──────┬────────┬────────┐
    #   # │ s    ┆ length ┆ nchars │
    #   # │ ---  ┆ ---    ┆ ---    │
    #   # │ str  ┆ u32    ┆ u32    │
    #   # ╞══════╪════════╪════════╡
    #   # │ Café ┆ 5      ┆ 4      │
    #   # │ null ┆ null   ┆ null   │
    #   # │ 345  ┆ 3      ┆ 3      │
    #   # │ 東京 ┆ 6      ┆ 2      │
    #   # └──────┴────────┴────────┘
    def n_chars
      Utils.wrap_expr(_rbexpr.str_len_chars)
    end

    # Vertically concat the values in the Series to a single string value.
    #
    # @param delimiter [String]
    #   The delimiter to insert between consecutive string values.
    # @param ignore_nulls [Boolean]
    #   Ignore null values (default).
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
    #   df.select(Polars.col("foo").str.concat("-"))
    #   # =>
    #   # shape: (1, 1)
    #   # ┌─────┐
    #   # │ foo │
    #   # │ --- │
    #   # │ str │
    #   # ╞═════╡
    #   # │ 1-2 │
    #   # └─────┘
    #
    # @example
    #   df = Polars::DataFrame.new({"foo" => [1, nil, 2]})
    #   df.select(Polars.col("foo").str.concat("-", ignore_nulls: false))
    #   # =>
    #   # shape: (1, 1)
    #   # ┌──────┐
    #   # │ foo  │
    #   # │ ---  │
    #   # │ str  │
    #   # ╞══════╡
    #   # │ null │
    #   # └──────┘
    def concat(delimiter = "-", ignore_nulls: true)
      Utils.wrap_expr(_rbexpr.str_concat(delimiter, ignore_nulls))
    end

    # Transform to uppercase variant.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"foo" => ["cat", "dog"]})
    #   df.select(Polars.col("foo").str.to_uppercase)
    #   # =>
    #   # shape: (2, 1)
    #   # ┌─────┐
    #   # │ foo │
    #   # │ --- │
    #   # │ str │
    #   # ╞═════╡
    #   # │ CAT │
    #   # │ DOG │
    #   # └─────┘
    def to_uppercase
      Utils.wrap_expr(_rbexpr.str_to_uppercase)
    end

    # Transform to lowercase variant.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"foo" => ["CAT", "DOG"]})
    #   df.select(Polars.col("foo").str.to_lowercase)
    #   # =>
    #   # shape: (2, 1)
    #   # ┌─────┐
    #   # │ foo │
    #   # │ --- │
    #   # │ str │
    #   # ╞═════╡
    #   # │ cat │
    #   # │ dog │
    #   # └─────┘
    def to_lowercase
      Utils.wrap_expr(_rbexpr.str_to_lowercase)
    end

    # Remove leading and trailing whitespace.
    #
    # @param characters [String, nil]
    #   An optional single character that should be trimmed.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
    #   df.select(Polars.col("foo").str.strip)
    #   # =>
    #   # shape: (3, 1)
    #   # ┌───────┐
    #   # │ foo   │
    #   # │ ---   │
    #   # │ str   │
    #   # ╞═══════╡
    #   # │ lead  │
    #   # │ trail │
    #   # │ both  │
    #   # └───────┘
    def strip_chars(characters = nil)
      characters = Utils.parse_as_expression(characters, str_as_lit: true)
      Utils.wrap_expr(_rbexpr.str_strip_chars(characters))
    end
    alias_method :strip, :strip_chars

    # Remove leading whitespace.
    #
    # @param characters [String, nil]
    #   An optional single character that should be trimmed.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
    #   df.select(Polars.col("foo").str.lstrip)
    #   # =>
    #   # shape: (3, 1)
    #   # ┌────────┐
    #   # │ foo    │
    #   # │ ---    │
    #   # │ str    │
    #   # ╞════════╡
    #   # │ lead   │
    #   # │ trail  │
    #   # │ both   │
    #   # └────────┘
    def strip_chars_start(characters = nil)
      characters = Utils.parse_as_expression(characters, str_as_lit: true)
      Utils.wrap_expr(_rbexpr.str_strip_chars_start(characters))
    end
    alias_method :lstrip, :strip_chars_start

    # Remove trailing whitespace.
    #
    # @param characters [String, nil]
    #   An optional single character that should be trimmed.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"foo" => [" lead", "trail ", " both "]})
    #   df.select(Polars.col("foo").str.rstrip)
    #   # =>
    #   # shape: (3, 1)
    #   # ┌───────┐
    #   # │ foo   │
    #   # │ ---   │
    #   # │ str   │
    #   # ╞═══════╡
    #   # │  lead │
    #   # │ trail │
    #   # │  both │
    #   # └───────┘
    def strip_chars_end(characters = nil)
      characters = Utils.parse_as_expression(characters, str_as_lit: true)
      Utils.wrap_expr(_rbexpr.str_strip_chars_end(characters))
    end
    alias_method :rstrip, :strip_chars_end

    # Fills the string with zeroes.
    #
    # Return a copy of the string left filled with ASCII '0' digits to make a string
    # of length width.
    #
    # A leading sign prefix ('+'/'-') is handled by inserting the padding after the
    # sign character rather than before. The original string is returned if width is
    # less than or equal to `s.length`.
    #
    # @param alignment [Integer]
    #   Fill the value up to this length
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new(
    #     {
    #       "num" => [-10, -1, 0, 1, 10, 100, 1000, 10000, 100000, 1000000, nil]
    #     }
    #   )
    #   df.with_column(Polars.col("num").cast(String).str.zfill(5))
    #   # =>
    #   # shape: (11, 1)
    #   # ┌─────────┐
    #   # │ num     │
    #   # │ ---     │
    #   # │ str     │
    #   # ╞═════════╡
    #   # │ -0010   │
    #   # │ -0001   │
    #   # │ 00000   │
    #   # │ 00001   │
    #   # │ …       │
    #   # │ 10000   │
    #   # │ 100000  │
    #   # │ 1000000 │
    #   # │ null    │
    #   # └─────────┘
    def zfill(alignment)
      Utils.wrap_expr(_rbexpr.str_zfill(alignment))
    end

    # Return the string left justified in a string of length `length`.
    #
    # Padding is done using the specified `fillchar`.
    # The original string is returned if `length` is less than or equal to
    # `s.length`.
    #
    # @param length [Integer]
    #   Justify left to this length.
    # @param fillchar [String]
    #   Fill with this ASCII character.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"a" => ["cow", "monkey", nil, "hippopotamus"]})
    #   df.select(Polars.col("a").str.ljust(8, "*"))
    #   # =>
    #   # shape: (4, 1)
    #   # ┌──────────────┐
    #   # │ a            │
    #   # │ ---          │
    #   # │ str          │
    #   # ╞══════════════╡
    #   # │ cow*****     │
    #   # │ monkey**     │
    #   # │ null         │
    #   # │ hippopotamus │
    #   # └──────────────┘
    def ljust(length, fillchar = " ")
      Utils.wrap_expr(_rbexpr.str_pad_end(length, fillchar))
    end
    alias_method :pad_end, :ljust

    # Return the string right justified in a string of length `length`.
    #
    # Padding is done using the specified `fillchar`.
    # The original string is returned if `length` is less than or equal to
    # `s.length`.
    #
    # @param length [Integer]
    #   Justify right to this length.
    # @param fillchar [String]
    #   Fill with this ASCII character.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"a" => ["cow", "monkey", nil, "hippopotamus"]})
    #   df.select(Polars.col("a").str.rjust(8, "*"))
    #   # =>
    #   # shape: (4, 1)
    #   # ┌──────────────┐
    #   # │ a            │
    #   # │ ---          │
    #   # │ str          │
    #   # ╞══════════════╡
    #   # │ *****cow     │
    #   # │ **monkey     │
    #   # │ null         │
    #   # │ hippopotamus │
    #   # └──────────────┘
    def rjust(length, fillchar = " ")
      Utils.wrap_expr(_rbexpr.str_pad_start(length, fillchar))
    end
    alias_method :pad_start, :rjust

    # Check if string contains a substring that matches a regex.
    #
    # @param pattern [String]
    #   A valid regex pattern.
    # @param literal [Boolean]
    #   Treat pattern as a literal string.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"a" => ["Crab", "cat and dog", "rab$bit", nil]})
    #   df.select(
    #     [
    #       Polars.col("a"),
    #       Polars.col("a").str.contains("cat|bit").alias("regex"),
    #       Polars.col("a").str.contains("rab$", literal: true).alias("literal")
    #     ]
    #   )
    #   # =>
    #   # shape: (4, 3)
    #   # ┌─────────────┬───────┬─────────┐
    #   # │ a           ┆ regex ┆ literal │
    #   # │ ---         ┆ ---   ┆ ---     │
    #   # │ str         ┆ bool  ┆ bool    │
    #   # ╞═════════════╪═══════╪═════════╡
    #   # │ Crab        ┆ false ┆ false   │
    #   # │ cat and dog ┆ true  ┆ false   │
    #   # │ rab$bit     ┆ true  ┆ true    │
    #   # │ null        ┆ null  ┆ null    │
    #   # └─────────────┴───────┴─────────┘
    def contains(pattern, literal: false, strict: true)
      pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)._rbexpr
      Utils.wrap_expr(_rbexpr.str_contains(pattern, literal, strict))
    end

    # Check if string values end with a substring.
    #
    # @param sub [String]
    #   Suffix substring.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"fruits" => ["apple", "mango", nil]})
    #   df.with_column(
    #     Polars.col("fruits").str.ends_with("go").alias("has_suffix")
    #   )
    #   # =>
    #   # shape: (3, 2)
    #   # ┌────────┬────────────┐
    #   # │ fruits ┆ has_suffix │
    #   # │ ---    ┆ ---        │
    #   # │ str    ┆ bool       │
    #   # ╞════════╪════════════╡
    #   # │ apple  ┆ false      │
    #   # │ mango  ┆ true       │
    #   # │ null   ┆ null       │
    #   # └────────┴────────────┘
    #
    # @example Using `ends_with` as a filter condition:
    #   df.filter(Polars.col("fruits").str.ends_with("go"))
    #   # =>
    #   # shape: (1, 1)
    #   # ┌────────┐
    #   # │ fruits │
    #   # │ ---    │
    #   # │ str    │
    #   # ╞════════╡
    #   # │ mango  │
    #   # └────────┘
    def ends_with(sub)
      sub = Utils.expr_to_lit_or_expr(sub, str_to_lit: true)._rbexpr
      Utils.wrap_expr(_rbexpr.str_ends_with(sub))
    end

    # Check if string values start with a substring.
    #
    # @param sub [String]
    #   Prefix substring.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"fruits" => ["apple", "mango", nil]})
    #   df.with_column(
    #     Polars.col("fruits").str.starts_with("app").alias("has_prefix")
    #   )
    #   # =>
    #   # shape: (3, 2)
    #   # ┌────────┬────────────┐
    #   # │ fruits ┆ has_prefix │
    #   # │ ---    ┆ ---        │
    #   # │ str    ┆ bool       │
    #   # ╞════════╪════════════╡
    #   # │ apple  ┆ true       │
    #   # │ mango  ┆ false      │
    #   # │ null   ┆ null       │
    #   # └────────┴────────────┘
    #
    # @example Using `starts_with` as a filter condition:
    #   df.filter(Polars.col("fruits").str.starts_with("app"))
    #   # =>
    #   # shape: (1, 1)
    #   # ┌────────┐
    #   # │ fruits │
    #   # │ ---    │
    #   # │ str    │
    #   # ╞════════╡
    #   # │ apple  │
    #   # └────────┘
    def starts_with(sub)
      sub = Utils.expr_to_lit_or_expr(sub, str_to_lit: true)._rbexpr
      Utils.wrap_expr(_rbexpr.str_starts_with(sub))
    end

    # Parse string values as JSON.
    #
    # Throw errors if encounter invalid JSON strings.
    #
    # @param dtype [Object]
    #   The dtype to cast the extracted value to. If nil, the dtype will be
    #   inferred from the JSON value.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new(
    #     {"json" => ['{"a":1, "b": true}', nil, '{"a":2, "b": false}']}
    #   )
    #   dtype = Polars::Struct.new([Polars::Field.new("a", Polars::Int64), Polars::Field.new("b", Polars::Boolean)])
    #   df.select(Polars.col("json").str.json_extract(dtype))
    #   # =>
    #   # shape: (3, 1)
    #   # ┌─────────────┐
    #   # │ json        │
    #   # │ ---         │
    #   # │ struct[2]   │
    #   # ╞═════════════╡
    #   # │ {1,true}    │
    #   # │ {null,null} │
    #   # │ {2,false}   │
    #   # └─────────────┘
    def json_extract(dtype = nil, infer_schema_length: 100)
      if !dtype.nil?
        dtype = Utils.rb_type_to_dtype(dtype)
      end
      Utils.wrap_expr(_rbexpr.str_json_extract(dtype, infer_schema_length))
    end

    # Extract the first match of json string with provided JSONPath expression.
    #
    # Throw errors if encounter invalid json strings.
    # All return value will be casted to Utf8 regardless of the original value.
    #
    # Documentation on JSONPath standard can be found
    # [here](https://goessner.net/articles/JsonPath/).
    #
    # @param json_path [String]
    #   A valid JSON path query string.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new(
    #     {"json_val" => ['{"a":"1"}', nil, '{"a":2}', '{"a":2.1}', '{"a":true}']}
    #   )
    #   df.select(Polars.col("json_val").str.json_path_match("$.a"))
    #   # =>
    #   # shape: (5, 1)
    #   # ┌──────────┐
    #   # │ json_val │
    #   # │ ---      │
    #   # │ str      │
    #   # ╞══════════╡
    #   # │ 1        │
    #   # │ null     │
    #   # │ 2        │
    #   # │ 2.1      │
    #   # │ true     │
    #   # └──────────┘
    def json_path_match(json_path)
      Utils.wrap_expr(_rbexpr.str_json_path_match(json_path))
    end

    # Decode a value using the provided encoding.
    #
    # @param encoding ["hex", "base64"]
    #   The encoding to use.
    # @param strict [Boolean]
    #   How to handle invalid inputs:
    #
    #   - `true`: An error will be thrown if unable to decode a value.
    #   - `false`: Unhandled values will be replaced with `nil`.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"encoded" => ["666f6f", "626172", nil]})
    #   df.select(Polars.col("encoded").str.decode("hex"))
    #   # =>
    #   # shape: (3, 1)
    #   # ┌───────────────┐
    #   # │ encoded       │
    #   # │ ---           │
    #   # │ binary        │
    #   # ╞═══════════════╡
    #   # │ [binary data] │
    #   # │ [binary data] │
    #   # │ null          │
    #   # └───────────────┘
    def decode(encoding, strict: true)
      if encoding == "hex"
        Utils.wrap_expr(_rbexpr.str_hex_decode(strict))
      elsif encoding == "base64"
        Utils.wrap_expr(_rbexpr.str_base64_decode(strict))
      else
        raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
      end
    end

    # Encode a value using the provided encoding.
    #
    # @param encoding ["hex", "base64"]
    #   The encoding to use.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"strings" => ["foo", "bar", nil]})
    #   df.select(Polars.col("strings").str.encode("hex"))
    #   # =>
    #   # shape: (3, 1)
    #   # ┌─────────┐
    #   # │ strings │
    #   # │ ---     │
    #   # │ str     │
    #   # ╞═════════╡
    #   # │ 666f6f  │
    #   # │ 626172  │
    #   # │ null    │
    #   # └─────────┘
    def encode(encoding)
      if encoding == "hex"
        Utils.wrap_expr(_rbexpr.str_hex_encode)
      elsif encoding == "base64"
        Utils.wrap_expr(_rbexpr.str_base64_encode)
      else
        raise ArgumentError, "encoding must be one of {{'hex', 'base64'}}, got #{encoding}"
      end
    end

    # Extract the target capture group from provided patterns.
    #
    # @param pattern [String]
    #   A valid regex pattern
    # @param group_index [Integer]
    #   Index of the targeted capture group.
    #   Group 0 mean the whole pattern, first group begin at index 1
    #   Default to the first capture group
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
    #   df.select(
    #     [
    #       Polars.col("foo").str.extract('(\d+)')
    #     ]
    #   )
    #   # =>
    #   # shape: (2, 1)
    #   # ┌─────┐
    #   # │ foo │
    #   # │ --- │
    #   # │ str │
    #   # ╞═════╡
    #   # │ 123 │
    #   # │ 678 │
    #   # └─────┘
    def extract(pattern, group_index: 1)
      Utils.wrap_expr(_rbexpr.str_extract(pattern, group_index))
    end

    # Extracts all matches for the given regex pattern.
    #
    # Extracts each successive non-overlapping regex match in an individual string as
    # an array.
    #
    # @param pattern [String]
    #   A valid regex pattern
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
    #   df.select(
    #     [
    #       Polars.col("foo").str.extract_all('(\d+)').alias("extracted_nrs")
    #     ]
    #   )
    #   # =>
    #   # shape: (2, 1)
    #   # ┌────────────────┐
    #   # │ extracted_nrs  │
    #   # │ ---            │
    #   # │ list[str]      │
    #   # ╞════════════════╡
    #   # │ ["123", "45"]  │
    #   # │ ["678", "910"] │
    #   # └────────────────┘
    def extract_all(pattern)
      pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
      Utils.wrap_expr(_rbexpr.str_extract_all(pattern._rbexpr))
    end

    # Count all successive non-overlapping regex matches.
    #
    # @param pattern [String]
    #   A valid regex pattern
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"foo" => ["123 bla 45 asd", "xyz 678 910t"]})
    #   df.select(
    #     [
    #       Polars.col("foo").str.count_match('\d').alias("count_digits")
    #     ]
    #   )
    #   # =>
    #   # shape: (2, 1)
    #   # ┌──────────────┐
    #   # │ count_digits │
    #   # │ ---          │
    #   # │ u32          │
    #   # ╞══════════════╡
    #   # │ 5            │
    #   # │ 6            │
    #   # └──────────────┘
    def count_matches(pattern, literal: false)
      pattern = Utils.parse_as_expression(pattern, str_as_lit: true)
      Utils.wrap_expr(_rbexpr.str_count_matches(pattern, literal))
    end
    alias_method :count_match, :count_matches

    # Split the string by a substring.
    #
    # @param by [String]
    #   Substring to split by.
    # @param inclusive [Boolean]
    #   If true, include the split character/string in the results.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"s" => ["foo bar", "foo-bar", "foo bar baz"]})
    #   df.select(Polars.col("s").str.split(" "))
    #   # =>
    #   # shape: (3, 1)
    #   # ┌───────────────────────┐
    #   # │ s                     │
    #   # │ ---                   │
    #   # │ list[str]             │
    #   # ╞═══════════════════════╡
    #   # │ ["foo", "bar"]        │
    #   # │ ["foo-bar"]           │
    #   # │ ["foo", "bar", "baz"] │
    #   # └───────────────────────┘
    def split(by, inclusive: false)
      by = Utils.parse_as_expression(by, str_as_lit: true)
      if inclusive
        Utils.wrap_expr(_rbexpr.str_split_inclusive(by))
      else
        Utils.wrap_expr(_rbexpr.str_split(by))
      end
    end

    # Split the string by a substring using `n` splits.
    #
    # Results in a struct of `n+1` fields.
    #
    # If it cannot make `n` splits, the remaining field elements will be null.
    #
    # @param by [String]
    #   Substring to split by.
    # @param n [Integer]
    #   Number of splits to make.
    # @param inclusive [Boolean]
    #   If true, include the split character/string in the results.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"x" => ["a_1", nil, "c", "d_4"]})
    #   df.select(
    #     [
    #       Polars.col("x").str.split_exact("_", 1).alias("fields")
    #     ]
    #   )
    #   # =>
    #   # shape: (4, 1)
    #   # ┌─────────────┐
    #   # │ fields      │
    #   # │ ---         │
    #   # │ struct[2]   │
    #   # ╞═════════════╡
    #   # │ {"a","1"}   │
    #   # │ {null,null} │
    #   # │ {"c",null}  │
    #   # │ {"d","4"}   │
    #   # └─────────────┘
    def split_exact(by, n, inclusive: false)
      by = Utils.parse_as_expression(by, str_as_lit: true)
      if inclusive
        Utils.wrap_expr(_rbexpr.str_split_exact_inclusive(by, n))
      else
        Utils.wrap_expr(_rbexpr.str_split_exact(by, n))
      end
    end

    # Split the string by a substring, restricted to returning at most `n` items.
    #
    # If the number of possible splits is less than `n-1`, the remaining field
    # elements will be null. If the number of possible splits is `n-1` or greater,
    # the last (nth) substring will contain the remainder of the string.
    #
    # @param by [String]
    #   Substring to split by.
    # @param n [Integer]
    #   Max number of items to return.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"s" => ["foo bar", nil, "foo-bar", "foo bar baz"]})
    #   df.select(Polars.col("s").str.splitn(" ", 2).alias("fields"))
    #   # =>
    #   # shape: (4, 1)
    #   # ┌───────────────────┐
    #   # │ fields            │
    #   # │ ---               │
    #   # │ struct[2]         │
    #   # ╞═══════════════════╡
    #   # │ {"foo","bar"}     │
    #   # │ {null,null}       │
    #   # │ {"foo-bar",null}  │
    #   # │ {"foo","bar baz"} │
    #   # └───────────────────┘
    def splitn(by, n)
      by = Utils.parse_as_expression(by, str_as_lit: true)
      Utils.wrap_expr(_rbexpr.str_splitn(by, n))
    end

    # Replace first matching regex/literal substring with a new string value.
    #
    # @param pattern [String]
    #   Regex pattern.
    # @param value [String]
    #   Replacement string.
    # @param literal [Boolean]
    #   Treat pattern as a literal string.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"id" => [1, 2], "text" => ["123abc", "abc456"]})
    #   df.with_column(
    #     Polars.col("text").str.replace('abc\b', "ABC")
    #   )
    #   # =>
    #   # shape: (2, 2)
    #   # ┌─────┬────────┐
    #   # │ id  ┆ text   │
    #   # │ --- ┆ ---    │
    #   # │ i64 ┆ str    │
    #   # ╞═════╪════════╡
    #   # │ 1   ┆ 123ABC │
    #   # │ 2   ┆ abc456 │
    #   # └─────┴────────┘
    def replace(pattern, value, literal: false, n: 1)
      pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
      value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
      Utils.wrap_expr(_rbexpr.str_replace_n(pattern._rbexpr, value._rbexpr, literal, n))
    end

    # Replace all matching regex/literal substrings with a new string value.
    #
    # @param pattern [String]
    #   Regex pattern.
    # @param value [String]
    #   Replacement string.
    # @param literal [Boolean]
    #   Treat pattern as a literal string.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"id" => [1, 2], "text" => ["abcabc", "123a123"]})
    #   df.with_column(Polars.col("text").str.replace_all("a", "-"))
    #   # =>
    #   # shape: (2, 2)
    #   # ┌─────┬─────────┐
    #   # │ id  ┆ text    │
    #   # │ --- ┆ ---     │
    #   # │ i64 ┆ str     │
    #   # ╞═════╪═════════╡
    #   # │ 1   ┆ -bc-bc  │
    #   # │ 2   ┆ 123-123 │
    #   # └─────┴─────────┘
    def replace_all(pattern, value, literal: false)
      pattern = Utils.expr_to_lit_or_expr(pattern, str_to_lit: true)
      value = Utils.expr_to_lit_or_expr(value, str_to_lit: true)
      Utils.wrap_expr(_rbexpr.str_replace_all(pattern._rbexpr, value._rbexpr, literal))
    end

    # Create subslices of the string values of a Utf8 Series.
    #
    # @param offset [Integer]
    #   Start index. Negative indexing is supported.
    # @param length [Integer]
    #   Length of the slice. If set to `nil` (default), the slice is taken to the
    #   end of the string.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"s" => ["pear", nil, "papaya", "dragonfruit"]})
    #   df.with_column(
    #     Polars.col("s").str.slice(-3).alias("s_sliced")
    #   )
    #   # =>
    #   # shape: (4, 2)
    #   # ┌─────────────┬──────────┐
    #   # │ s           ┆ s_sliced │
    #   # │ ---         ┆ ---      │
    #   # │ str         ┆ str      │
    #   # ╞═════════════╪══════════╡
    #   # │ pear        ┆ ear      │
    #   # │ null        ┆ null     │
    #   # │ papaya      ┆ aya      │
    #   # │ dragonfruit ┆ uit      │
    #   # └─────────────┴──────────┘
    def slice(offset, length = nil)
      Utils.wrap_expr(_rbexpr.str_slice(offset, length))
    end

    # Returns a column with a separate row for every string character.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"a": ["foo", "bar"]})
    #   df.select(Polars.col("a").str.explode)
    #   # =>
    #   # shape: (6, 1)
    #   # ┌─────┐
    #   # │ a   │
    #   # │ --- │
    #   # │ str │
    #   # ╞═════╡
    #   # │ f   │
    #   # │ o   │
    #   # │ o   │
    #   # │ b   │
    #   # │ a   │
    #   # │ r   │
    #   # └─────┘
    def explode
      Utils.wrap_expr(_rbexpr.str_explode)
    end

    # Convert an Utf8 column into an Int64 column with base radix.
    #
    # @param base [Integer]
    #   Positive integer which is the base of the string we are parsing.
    #   Default: 10.
    # @param strict [Boolean]
    #   Bool, default=true will raise any ParseError or overflow as ComputeError.
    #   false silently convert to Null.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
    #   df.with_columns(Polars.col("bin").str.to_integer(base: 2, strict: false).alias("parsed"))
    #   # =>
    #   # shape: (4, 2)
    #   # ┌─────────┬────────┐
    #   # │ bin     ┆ parsed │
    #   # │ ---     ┆ ---    │
    #   # │ str     ┆ i64    │
    #   # ╞═════════╪════════╡
    #   # │ 110     ┆ 6      │
    #   # │ 101     ┆ 5      │
    #   # │ 010     ┆ 2      │
    #   # │ invalid ┆ null   │
    #   # └─────────┴────────┘
    #
    # @example
    #   df = Polars::DataFrame.new({"hex" => ["fa1e", "ff00", "cafe", nil]})
    #   df.with_columns(Polars.col("hex").str.to_integer(base: 16, strict: true).alias("parsed"))
    #   # =>
    #   # shape: (4, 2)
    #   # ┌──────┬────────┐
    #   # │ hex  ┆ parsed │
    #   # │ ---  ┆ ---    │
    #   # │ str  ┆ i64    │
    #   # ╞══════╪════════╡
    #   # │ fa1e ┆ 64030  │
    #   # │ ff00 ┆ 65280  │
    #   # │ cafe ┆ 51966  │
    #   # │ null ┆ null   │
    #   # └──────┴────────┘
    def to_integer(base: 10, strict: true)
      Utils.wrap_expr(_rbexpr.str_to_integer(base, strict))
    end

    # Parse integers with base radix from strings.
    #
    # By default base 2. ParseError/Overflows become Nulls.
    #
    # @param radix [Integer]
    #   Positive integer which is the base of the string we are parsing.
    #   Default: 2.
    # @param strict [Boolean]
    #   Bool, Default=true will raise any ParseError or overflow as ComputeError.
    #   False silently convert to Null.
    #
    # @return [Expr]
    #
    # @example
    #   df = Polars::DataFrame.new({"bin" => ["110", "101", "010", "invalid"]})
    #   df.select(Polars.col("bin").str.parse_int(2, strict: false))
    #   # =>
    #   # shape: (4, 1)
    #   # ┌──────┐
    #   # │ bin  │
    #   # │ ---  │
    #   # │ i32  │
    #   # ╞══════╡
    #   # │ 6    │
    #   # │ 5    │
    #   # │ 2    │
    #   # │ null │
    #   # └──────┘
    def parse_int(radix = 2, strict: true)
      to_integer(base: 2, strict: strict).cast(Int32, strict: strict)
    end

    private

    def _validate_format_argument(format)
      # TODO
    end
  end
end