lib/polars/expr.rb in polars-df-0.1.3 vs lib/polars/expr.rb in polars-df-0.1.4
- old
+ new
@@ -136,12 +136,49 @@
# @return [Expr]
def -@
Utils.lit(0) - self
end
- # def to_physical
- # end
+ # Cast to physical representation of the logical dtype.
+ #
+ # - `:date` -> `:i32`
+ # - `:datetime` -> `:i64`
+ # - `:time` -> `:i64`
+ # - `:duration` -> `:i64`
+ # - `:cat` -> `:u32`
+ # - Other data types will be left unchanged.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # Polars::DataFrame.new({"vals" => ["a", "x", nil, "a"]}).with_columns(
+ # [
+ # Polars.col("vals").cast(:cat),
+ # Polars.col("vals")
+ # .cast(:cat)
+ # .to_physical
+ # .alias("vals_physical")
+ # ]
+ # )
+ # # =>
+ # # shape: (4, 2)
+ # # ┌──────┬───────────────┐
+ # # │ vals ┆ vals_physical │
+ # # │ --- ┆ --- │
+ # # │ cat ┆ u32 │
+ # # ╞══════╪═══════════════╡
+ # # │ a ┆ 0 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ x ┆ 1 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ null │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ a ┆ 0 │
+ # # └──────┴───────────────┘
+ def to_physical
+ wrap_expr(_rbexpr.to_physical)
+ end
# Check if any boolean value in a Boolean column is `true`.
#
# @return [Boolean]
#
@@ -256,17 +293,86 @@
# # └──────────┘
def exp
wrap_expr(_rbexpr.exp)
end
+ # Rename the output of an expression.
+ #
+ # @param name [String]
+ # New name.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, 2, 3],
+ # "b" => ["a", "b", nil]
+ # }
+ # )
+ # df.select(
+ # [
+ # Polars.col("a").alias("bar"),
+ # Polars.col("b").alias("foo")
+ # ]
+ # )
+ # # =>
+ # # shape: (3, 2)
+ # # ┌─────┬──────┐
+ # # │ bar ┆ foo │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ str │
+ # # ╞═════╪══════╡
+ # # │ 1 ┆ a │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
+ # # │ 2 ┆ b │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
+ # # │ 3 ┆ null │
+ # # └─────┴──────┘
def alias(name)
wrap_expr(_rbexpr._alias(name))
end
# TODO support symbols for exclude
+ # Exclude certain columns from a wildcard/regex selection.
#
+ # You may also use regexes in the exclude list. They must start with `^` and end
+ # with `$`.
+ #
+ # @param columns [Object]
+ # Column(s) to exclude from selection.
+ # This can be:
+ #
+ # - a column name, or multiple column names
+ # - a regular expression starting with `^` and ending with `$`
+ # - a dtype or multiple dtypes
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "aa" => [1, 2, 3],
+ # "ba" => ["a", "b", nil],
+ # "cc" => [nil, 2.5, 1.5]
+ # }
+ # )
+ # df.select(Polars.all.exclude("ba"))
+ # # =>
+ # # shape: (3, 2)
+ # # ┌─────┬──────┐
+ # # │ aa ┆ cc │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ f64 │
+ # # ╞═════╪══════╡
+ # # │ 1 ┆ null │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
+ # # │ 2 ┆ 2.5 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
+ # # │ 3 ┆ 1.5 │
+ # # └─────┴──────┘
def exclude(columns)
if columns.is_a?(String)
columns = [columns]
return wrap_expr(_rbexpr.exclude(columns))
elsif !columns.is_a?(Array)
@@ -283,18 +389,47 @@
else
wrap_expr(_rbexpr.exclude_dtype(columns))
end
end
+ # Keep the original root name of the expression.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, 2],
+ # "b" => [3, 4]
+ # }
+ # )
+ # df.with_columns([(Polars.col("a") * 9).alias("c").keep_name])
+ # # =>
+ # # shape: (2, 2)
+ # # ┌─────┬─────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═════╪═════╡
+ # # │ 9 ┆ 3 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 18 ┆ 4 │
+ # # └─────┴─────┘
def keep_name
wrap_expr(_rbexpr.keep_name)
end
+ # Add a prefix to the root column name of the expression.
+ #
+ # @return [Expr]
def prefix(prefix)
wrap_expr(_rbexpr.prefix(prefix))
end
+ # Add a suffix to the root column name of the expression.
+ #
+ # @return [Expr]
def suffix(suffix)
wrap_expr(_rbexpr.suffix(suffix))
end
# def map_alias
@@ -462,18 +597,116 @@
# # └───────┴───────┘
def is_infinite
wrap_expr(_rbexpr.is_infinite)
end
+ # Returns a boolean Series indicating which values are NaN.
+ #
+ # @note
+ # Floating point `NaN` (Not A Number) should not be confused
+ # with missing data represented as `nil`.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, 2, nil, 1, 5],
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
+ # }
+ # )
+ # df.with_column(Polars.col(Polars::Float64).is_nan.suffix("_isnan"))
+ # # =>
+ # # shape: (5, 3)
+ # # ┌──────┬─────┬─────────┐
+ # # │ a ┆ b ┆ b_isnan │
+ # # │ --- ┆ --- ┆ --- │
+ # # │ i64 ┆ f64 ┆ bool │
+ # # ╞══════╪═════╪═════════╡
+ # # │ 1 ┆ 1.0 ┆ false │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ 2.0 ┆ false │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ NaN ┆ true │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+ # # │ 1 ┆ 1.0 ┆ false │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌┤
+ # # │ 5 ┆ 5.0 ┆ false │
+ # # └──────┴─────┴─────────┘
def is_nan
wrap_expr(_rbexpr.is_nan)
end
+ # Returns a boolean Series indicating which values are not NaN.
+ #
+ # @note
+ # Floating point `NaN` (Not A Number) should not be confused
+ # with missing data represented as `nil`.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, 2, nil, 1, 5],
+ # "b" => [1.0, 2.0, Float::NAN, 1.0, 5.0]
+ # }
+ # )
+ # df.with_column(Polars.col(Polars::Float64).is_not_nan.suffix("_is_not_nan"))
+ # # =>
+ # # shape: (5, 3)
+ # # ┌──────┬─────┬──────────────┐
+ # # │ a ┆ b ┆ b_is_not_nan │
+ # # │ --- ┆ --- ┆ --- │
+ # # │ i64 ┆ f64 ┆ bool │
+ # # ╞══════╪═════╪══════════════╡
+ # # │ 1 ┆ 1.0 ┆ true │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ 2.0 ┆ true │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ NaN ┆ false │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1 ┆ 1.0 ┆ true │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 5 ┆ 5.0 ┆ true │
+ # # └──────┴─────┴──────────────┘
def is_not_nan
wrap_expr(_rbexpr.is_not_nan)
end
+ # Get the group indexes of the group by operation.
+ #
+ # Should be used in aggregation context only.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "group" => [
+ # "one",
+ # "one",
+ # "one",
+ # "two",
+ # "two",
+ # "two"
+ # ],
+ # "value" => [94, 95, 96, 97, 97, 99]
+ # }
+ # )
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").agg_groups)
+ # # =>
+ # # shape: (2, 2)
+ # # ┌───────┬───────────┐
+ # # │ group ┆ value │
+ # # │ --- ┆ --- │
+ # # │ str ┆ list[u32] │
+ # # ╞═══════╪═══════════╡
+ # # │ one ┆ [0, 1, 2] │
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ two ┆ [3, 4, 5] │
+ # # └───────┴───────────┘
def agg_groups
wrap_expr(_rbexpr.agg_groups)
end
# Count the number of values in this expression.
@@ -555,21 +788,51 @@
length = Polars.lit(length)
end
wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr))
end
+ # Append expressions.
+ #
+ # This is done by adding the chunks of `other` to this `Series`.
+ #
+ # @param other [Expr]
+ # Expression to append.
+ # @param upcast [Boolean]
+ # Cast both `Series` to the same supertype.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [8, 9, 10],
+ # "b" => [nil, 4, 4]
+ # }
+ # )
+ # df.select(Polars.all.head(1).append(Polars.all.tail(1)))
+ # # =>
+ # # shape: (2, 2)
+ # # ┌─────┬──────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═════╪══════╡
+ # # │ 8 ┆ null │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌┤
+ # # │ 10 ┆ 4 │
+ # # └─────┴──────┘
def append(other, upcast: true)
other = Utils.expr_to_lit_or_expr(other)
wrap_expr(_rbexpr.append(other._rbexpr, upcast))
end
# Create a single chunk of memory for this Series.
#
# @return [Expr]
#
# @example Create a Series with 3 nulls, append column a then rechunk
- # df = Polars::DataFrame.new({"a": [1, 1, 2]})
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
# df.select(Polars.repeat(nil, 3).append(Polars.col("a")).rechunk)
# # =>
# # shape: (6, 1)
# # ┌─────────┐
# # │ literal │
@@ -648,26 +911,186 @@
# # └──────┘
def drop_nans
wrap_expr(_rbexpr.drop_nans)
end
+ # Get an array with the cumulative sum computed at every element.
+ #
+ # @param reverse [Boolean]
+ # Reverse the operation.
+ #
+ # @return [Expr]
+ #
+ # @note
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
+ # `:i64` before summing to prevent overflow issues.
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
+ # df.select(
+ # [
+ # Polars.col("a").cumsum,
+ # Polars.col("a").cumsum(reverse: true).alias("a_reverse")
+ # ]
+ # )
+ # # =>
+ # # shape: (4, 2)
+ # # ┌─────┬───────────┐
+ # # │ a ┆ a_reverse │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═════╪═══════════╡
+ # # │ 1 ┆ 10 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 3 ┆ 9 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 6 ┆ 7 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 10 ┆ 4 │
+ # # └─────┴───────────┘
def cumsum(reverse: false)
wrap_expr(_rbexpr.cumsum(reverse))
end
+ # Get an array with the cumulative product computed at every element.
+ #
+ # @param reverse [Boolean]
+ # Reverse the operation.
+ #
+ # @return [Expr]
+ #
+ # @note
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
+ # `:i64` before summing to prevent overflow issues.
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
+ # df.select(
+ # [
+ # Polars.col("a").cumprod,
+ # Polars.col("a").cumprod(reverse: true).alias("a_reverse")
+ # ]
+ # )
+ # # =>
+ # # shape: (4, 2)
+ # # ┌─────┬───────────┐
+ # # │ a ┆ a_reverse │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═════╪═══════════╡
+ # # │ 1 ┆ 24 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ 24 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 6 ┆ 12 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 24 ┆ 4 │
+ # # └─────┴───────────┘
def cumprod(reverse: false)
wrap_expr(_rbexpr.cumprod(reverse))
end
+ # Get an array with the cumulative min computed at every element.
+ #
+ # @param reverse [Boolean]
+ # Reverse the operation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
+ # df.select(
+ # [
+ # Polars.col("a").cummin,
+ # Polars.col("a").cummin(reverse: true).alias("a_reverse")
+ # ]
+ # )
+ # # =>
+ # # shape: (4, 2)
+ # # ┌─────┬───────────┐
+ # # │ a ┆ a_reverse │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═════╪═══════════╡
+ # # │ 1 ┆ 1 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1 ┆ 2 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1 ┆ 3 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1 ┆ 4 │
+ # # └─────┴───────────┘
def cummin(reverse: false)
wrap_expr(_rbexpr.cummin(reverse))
end
+ # Get an array with the cumulative max computed at every element.
+ #
+ # @param reverse [Boolean]
+ # Reverse the operation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
+ # df.select(
+ # [
+ # Polars.col("a").cummax,
+ # Polars.col("a").cummax(reverse: true).alias("a_reverse")
+ # ]
+ # )
+ # # =>
+ # # shape: (4, 2)
+ # # ┌─────┬───────────┐
+ # # │ a ┆ a_reverse │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═════╪═══════════╡
+ # # │ 1 ┆ 4 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ 4 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 3 ┆ 4 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 4 ┆ 4 │
+ # # └─────┴───────────┘
def cummax(reverse: false)
wrap_expr(_rbexpr.cummax(reverse))
end
+ # Get an array with the cumulative count computed at every element.
+ #
+ # Counting from 0 to len
+ #
+ # @param reverse [Boolean]
+ # Reverse the operation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 4]})
+ # df.select(
+ # [
+ # Polars.col("a").cumcount,
+ # Polars.col("a").cumcount(reverse: true).alias("a_reverse")
+ # ]
+ # )
+ # # =>
+ # # shape: (4, 2)
+ # # ┌─────┬───────────┐
+ # # │ a ┆ a_reverse │
+ # # │ --- ┆ --- │
+ # # │ u32 ┆ u32 │
+ # # ╞═════╪═══════════╡
+ # # │ 0 ┆ 3 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1 ┆ 2 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ 1 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 3 ┆ 0 │
+ # # └─────┴───────────┘
def cumcount(reverse: false)
wrap_expr(_rbexpr.cumcount(reverse))
end
# Rounds down to the nearest integer value.
@@ -753,10 +1176,34 @@
# # └─────┘
def round(decimals = 0)
wrap_expr(_rbexpr.round(decimals))
end
+ # Compute the dot/inner product between two Expressions.
+ #
+ # @param other [Expr]
+ # Expression to compute dot product with.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, 3, 5],
+ # "b" => [2, 4, 6]
+ # }
+ # )
+ # df.select(Polars.col("a").dot(Polars.col("b")))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 44 │
+ # # └─────┘
def dot(other)
other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
wrap_expr(_rbexpr.dot(other._rbexpr))
end
@@ -787,23 +1234,210 @@
# # └─────┴─────┘
def mode
wrap_expr(_rbexpr.mode)
end
+ # Cast between data types.
+ #
+ # @param dtype [Symbol]
+ # DataType to cast to.
+ # @param strict [Boolean]
+ # Throw an error if a cast could not be done.
+ # For instance, due to an overflow.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, 2, 3],
+ # "b" => ["4", "5", "6"]
+ # }
+ # )
+ # df.with_columns(
+ # [
+ # Polars.col("a").cast(:f64),
+ # Polars.col("b").cast(:i32)
+ # ]
+ # )
+ # # =>
+ # # shape: (3, 2)
+ # # ┌─────┬─────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ f64 ┆ i32 │
+ # # ╞═════╪═════╡
+ # # │ 1.0 ┆ 4 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 2.0 ┆ 5 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 3.0 ┆ 6 │
+ # # └─────┴─────┘
def cast(dtype, strict: true)
dtype = Utils.rb_type_to_dtype(dtype)
wrap_expr(_rbexpr.cast(dtype, strict))
end
+ # Sort this column. In projection/ selection context the whole column is sorted.
+ #
+ # If used in a groupby context, the groups are sorted.
+ #
+ # @param reverse [Boolean]
+ # false -> order from small to large.
+ # true -> order from large to small.
+ # @param nulls_last [Boolean]
+ # If true nulls are considered to be larger than any valid value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "group" => [
+ # "one",
+ # "one",
+ # "one",
+ # "two",
+ # "two",
+ # "two"
+ # ],
+ # "value" => [1, 98, 2, 3, 99, 4]
+ # }
+ # )
+ # df.select(Polars.col("value").sort)
+ # # =>
+ # # shape: (6, 1)
+ # # ┌───────┐
+ # # │ value │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═══════╡
+ # # │ 1 │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ 2 │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ 3 │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ 4 │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ 98 │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ 99 │
+ # # └───────┘
+ #
+ # @example
+ # df.select(Polars.col("value").sort)
+ # # =>
+ # # shape: (6, 1)
+ # # ┌───────┐
+ # # │ value │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═══════╡
+ # # │ 1 │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ 2 │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ 3 │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ 4 │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ 98 │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ 99 │
+ # # └───────┘
+ #
+ # @example
+ # df.groupby("group").agg(Polars.col("value").sort)
+ # # =>
+ # # shape: (2, 2)
+ # # ┌───────┬────────────┐
+ # # │ group ┆ value │
+ # # │ --- ┆ --- │
+ # # │ str ┆ list[i64] │
+ # # ╞═══════╪════════════╡
+ # # │ two ┆ [3, 4, 99] │
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ one ┆ [1, 2, 98] │
+ # # └───────┴────────────┘
def sort(reverse: false, nulls_last: false)
wrap_expr(_rbexpr.sort_with(reverse, nulls_last))
end
+ # Return the `k` largest elements.
+ #
+ # If 'reverse: true` the smallest elements will be given.
+ #
+ # @param k [Integer]
+ # Number of elements to return.
+ # @param reverse [Boolean]
+ # Return the smallest elements.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "value" => [1, 98, 2, 3, 99, 4]
+ # }
+ # )
+ # df.select(
+ # [
+ # Polars.col("value").top_k.alias("top_k"),
+ # Polars.col("value").top_k(reverse: true).alias("bottom_k")
+ # ]
+ # )
+ # # =>
+ # # shape: (5, 2)
+ # # ┌───────┬──────────┐
+ # # │ top_k ┆ bottom_k │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═══════╪══════════╡
+ # # │ 99 ┆ 1 │
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 98 ┆ 2 │
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 4 ┆ 3 │
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 3 ┆ 4 │
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ 98 │
+ # # └───────┴──────────┘
def top_k(k: 5, reverse: false)
wrap_expr(_rbexpr.top_k(k, reverse))
end
+ # Get the index values that would sort this column.
+ #
+ # @param reverse [Boolean]
+ # Sort in reverse (descending) order.
+ # @param nulls_last [Boolean]
+ # Place null values last instead of first.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [20, 10, 30]
+ # }
+ # )
+ # df.select(Polars.col("a").arg_sort)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ u32 │
+ # # ╞═════╡
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 0 │
+ # # ├╌╌╌╌╌┤
+ # # │ 2 │
+ # # └─────┘
def arg_sort(reverse: false, nulls_last: false)
wrap_expr(_rbexpr.arg_sort(reverse, nulls_last))
end
# Get the index of the maximal value.
@@ -852,15 +1486,91 @@
# # └─────┘
def arg_min
wrap_expr(_rbexpr.arg_min)
end
+ # Find indices where elements should be inserted to maintain order.
+ #
+ # @param element [Object]
+ # Expression or scalar value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "values" => [1, 2, 3, 5]
+ # }
+ # )
+ # df.select(
+ # [
+ # Polars.col("values").search_sorted(0).alias("zero"),
+ # Polars.col("values").search_sorted(3).alias("three"),
+ # Polars.col("values").search_sorted(6).alias("six")
+ # ]
+ # )
+ # # =>
+ # # shape: (1, 3)
+ # # ┌──────┬───────┬─────┐
+ # # │ zero ┆ three ┆ six │
+ # # │ --- ┆ --- ┆ --- │
+ # # │ u32 ┆ u32 ┆ u32 │
+ # # ╞══════╪═══════╪═════╡
+ # # │ 0 ┆ 2 ┆ 4 │
+ # # └──────┴───────┴─────┘
def search_sorted(element)
element = Utils.expr_to_lit_or_expr(element, str_to_lit: false)
wrap_expr(_rbexpr.search_sorted(element._rbexpr))
end
+ # Sort this column by the ordering of another column, or multiple other columns.
+ #
+ # In projection/ selection context the whole column is sorted.
+ # If used in a groupby context, the groups are sorted.
+ #
+ # @param by [Object]
+ # The column(s) used for sorting.
+ # @param reverse [Boolean]
+ # false -> order from small to large.
+ # true -> order from large to small.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "group" => [
+ # "one",
+ # "one",
+ # "one",
+ # "two",
+ # "two",
+ # "two"
+ # ],
+ # "value" => [1, 98, 2, 3, 99, 4]
+ # }
+ # )
+ # df.select(Polars.col("group").sort_by("value"))
+ # # =>
+ # # shape: (6, 1)
+ # # ┌───────┐
+ # # │ group │
+ # # │ --- │
+ # # │ str │
+ # # ╞═══════╡
+ # # │ one │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ one │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ two │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ two │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ one │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ two │
+ # # └───────┘
def sort_by(by, reverse: false)
if !by.is_a?(Array)
by = [by]
end
if !reverse.is_a?(Array)
@@ -869,10 +1579,43 @@
by = Utils.selection_to_rbexpr_list(by)
wrap_expr(_rbexpr.sort_by(by, reverse))
end
+ # Take values by index.
+ #
+ # @param indices [Expr]
+ # An expression that leads to a `:u32` dtyped Series.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "group" => [
+ # "one",
+ # "one",
+ # "one",
+ # "two",
+ # "two",
+ # "two"
+ # ],
+ # "value" => [1, 98, 2, 3, 99, 4]
+ # }
+ # )
+ # df.groupby("group", maintain_order: true).agg(Polars.col("value").take(1))
+ # # =>
+ # # shape: (2, 2)
+ # # ┌───────┬───────┐
+ # # │ group ┆ value │
+ # # │ --- ┆ --- │
+ # # │ str ┆ i64 │
+ # # ╞═══════╪═══════╡
+ # # │ one ┆ 98 │
+ # # ├╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+ # # │ two ┆ 99 │
+ # # └───────┴───────┘
def take(indices)
if indices.is_a?(Array)
indices_lit = Polars.lit(Series.new("", indices, dtype: :u32))
else
indices_lit = Utils.expr_to_lit_or_expr(indices, str_to_lit: false)
@@ -907,15 +1650,109 @@
# # └──────┘
def shift(periods = 1)
wrap_expr(_rbexpr.shift(periods))
end
+ # Shift the values by a given period and fill the resulting null values.
+ #
+ # @param periods [Integer]
+ # Number of places to shift (may be negative).
+ # @param fill_value [Object]
+ # Fill nil values with the result of this expression.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
+ # df.select(Polars.col("foo").shift_and_fill(1, "a"))
+ # # =>
+ # # shape: (4, 1)
+ # # ┌─────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ str │
+ # # ╞═════╡
+ # # │ a │
+ # # ├╌╌╌╌╌┤
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 2 │
+ # # ├╌╌╌╌╌┤
+ # # │ 3 │
+ # # └─────┘
def shift_and_fill(periods, fill_value)
fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr))
end
+ # Fill null values using the specified value or strategy.
+ #
+ # To interpolate over null values see interpolate.
+ #
+ # @param value [Object]
+ # Value used to fill null values.
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
+ # Strategy used to fill null values.
+ # @param limit [Integer]
+ # Number of consecutive null values to fill when using the 'forward' or
+ # 'backward' strategy.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, 2, nil],
+ # "b" => [4, nil, 6]
+ # }
+ # )
+ # df.fill_null(strategy: "zero")
+ # # =>
+ # # shape: (3, 2)
+ # # ┌─────┬─────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═════╪═════╡
+ # # │ 1 ┆ 4 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 2 ┆ 0 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 0 ┆ 6 │
+ # # └─────┴─────┘
+ #
+ # @example
+ # df.fill_null(99)
+ # # =>
+ # # shape: (3, 2)
+ # # ┌─────┬─────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═════╪═════╡
+ # # │ 1 ┆ 4 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 2 ┆ 99 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 99 ┆ 6 │
+ # # └─────┴─────┘
+ #
+ # @example
+ # df.fill_null(strategy: "forward")
+ # # =>
+ # # shape: (3, 2)
+ # # ┌─────┬─────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═════╪═════╡
+ # # │ 1 ┆ 4 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 2 ┆ 4 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 2 ┆ 6 │
+ # # └─────┴─────┘
def fill_null(value = nil, strategy: nil, limit: nil)
if !value.nil? && !strategy.nil?
raise ArgumentError, "cannot specify both 'value' and 'strategy'."
elsif value.nil? && strategy.nil?
raise ArgumentError, "must specify either a fill 'value' or 'strategy'"
@@ -929,399 +1766,3232 @@
else
wrap_expr(_rbexpr.fill_null_with_strategy(strategy, limit))
end
end
+ # Fill floating point NaN value with a fill value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1.0, nil, Float::NAN],
+ # "b" => [4.0, Float::NAN, 6]
+ # }
+ # )
+ # df.fill_nan("zero")
+ # # =>
+ # # shape: (3, 2)
+ # # ┌──────┬──────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ str ┆ str │
+ # # ╞══════╪══════╡
+ # # │ 1.0 ┆ 4.0 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+ # # │ null ┆ zero │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌┤
+ # # │ zero ┆ 6.0 │
+ # # └──────┴──────┘
def fill_nan(fill_value)
fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true)
wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr))
end
+ # Fill missing values with the latest seen values.
+ #
+ # @param limit [Integer]
+ # The number of consecutive null values to forward fill.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, 2, nil],
+ # "b" => [4, nil, 6]
+ # }
+ # )
+ # df.select(Polars.all.forward_fill)
+ # # =>
+ # # shape: (3, 2)
+ # # ┌─────┬─────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞═════╪═════╡
+ # # │ 1 ┆ 4 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 2 ┆ 4 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 2 ┆ 6 │
+ # # └─────┴─────┘
def forward_fill(limit: nil)
wrap_expr(_rbexpr.forward_fill(limit))
end
+ # Fill missing values with the next to be seen values.
+ #
+ # @param limit [Integer]
+ # The number of consecutive null values to backward fill.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, 2, nil],
+ # "b" => [4, nil, 6]
+ # }
+ # )
+ # df.select(Polars.all.backward_fill)
+ # # =>
+ # # shape: (3, 2)
+ # # ┌──────┬─────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞══════╪═════╡
+ # # │ 1 ┆ 4 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 2 ┆ 6 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ null ┆ 6 │
+ # # └──────┴─────┘
def backward_fill(limit: nil)
wrap_expr(_rbexpr.backward_fill(limit))
end
+ # Reverse the selection.
+ #
+ # @return [Expr]
def reverse
wrap_expr(_rbexpr.reverse)
end
+ # Get standard deviation.
+ #
+ # @param ddof [Integer]
+ # Degrees of freedom.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
+ # df.select(Polars.col("a").std)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 1.0 │
+ # # └─────┘
def std(ddof: 1)
wrap_expr(_rbexpr.std(ddof))
end
+ # Get variance.
+ #
+ # @param ddof [Integer]
+ # Degrees of freedom.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
+ # df.select(Polars.col("a").var)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 1.0 │
+ # # └─────┘
def var(ddof: 1)
wrap_expr(_rbexpr.var(ddof))
end
+ # Get maximum value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
+ # df.select(Polars.col("a").max)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 1.0 │
+ # # └─────┘
def max
wrap_expr(_rbexpr.max)
end
+ # Get minimum value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [-1.0, Float::NAN, 1.0]})
+ # df.select(Polars.col("a").min)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════╡
+ # # │ -1.0 │
+ # # └──────┘
def min
wrap_expr(_rbexpr.min)
end
+ # Get maximum value, but propagate/poison encountered NaN values.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
+ # df.select(Polars.col("a").nan_max)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ NaN │
+ # # └─────┘
def nan_max
wrap_expr(_rbexpr.nan_max)
end
+ # Get minimum value, but propagate/poison encountered NaN values.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [0.0, Float::NAN]})
+ # df.select(Polars.col("a").nan_min)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ NaN │
+ # # └─────┘
def nan_min
wrap_expr(_rbexpr.nan_min)
end
+ # Get sum value.
+ #
+ # @return [Expr]
+ #
+ # @note
+ # Dtypes in `:i8`, `:u8`, `:i16`, and `:u16` are cast to
+ # `:i64` before summing to prevent overflow issues.
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
+ # df.select(Polars.col("a").sum)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 0 │
+ # # └─────┘
def sum
wrap_expr(_rbexpr.sum)
end
+ # Get mean value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
+ # df.select(Polars.col("a").mean)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 0.0 │
+ # # └─────┘
def mean
wrap_expr(_rbexpr.mean)
end
+ # Get median value using linear interpolation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [-1, 0, 1]})
+ # df.select(Polars.col("a").median)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 0.0 │
+ # # └─────┘
def median
wrap_expr(_rbexpr.median)
end
+ # Compute the product of an expression.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
+ # df.select(Polars.col("a").product)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 6 │
+ # # └─────┘
def product
wrap_expr(_rbexpr.product)
end
+ # Count unique values.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
+ # df.select(Polars.col("a").n_unique)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ u32 │
+ # # ╞═════╡
+ # # │ 2 │
+ # # └─────┘
def n_unique
wrap_expr(_rbexpr.n_unique)
end
+ # Count null values.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [nil, 1, nil],
+ # "b" => [1, 2, 3]
+ # }
+ # )
+ # df.select(Polars.all.null_count)
+ # # =>
+ # # shape: (1, 2)
+ # # ┌─────┬─────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ u32 ┆ u32 │
+ # # ╞═════╪═════╡
+ # # │ 2 ┆ 0 │
+ # # └─────┴─────┘
def null_count
wrap_expr(_rbexpr.null_count)
end
+ # Get index of first unique value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [8, 9, 10],
+ # "b" => [nil, 4, 4]
+ # }
+ # )
+ # df.select(Polars.col("a").arg_unique)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ u32 │
+ # # ╞═════╡
+ # # │ 0 │
+ # # ├╌╌╌╌╌┤
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 2 │
+ # # └─────┘
+ #
+ # @example
+ # df.select(Polars.col("b").arg_unique)
+ # # =>
+ # # shape: (2, 1)
+ # # ┌─────┐
+ # # │ b │
+ # # │ --- │
+ # # │ u32 │
+ # # ╞═════╡
+ # # │ 0 │
+ # # ├╌╌╌╌╌┤
+ # # │ 1 │
+ # # └─────┘
def arg_unique
wrap_expr(_rbexpr.arg_unique)
end
+ # Get unique values of this expression.
+ #
+ # @param maintain_order [Boolean]
+ # Maintain order of data. This requires more work.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
+ # df.select(Polars.col("a").unique(maintain_order: true))
+ # # =>
+ # # shape: (2, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 2 │
+ # # └─────┘
def unique(maintain_order: false)
if maintain_order
wrap_expr(_rbexpr.unique_stable)
else
wrap_expr(_rbexpr.unique)
end
end
+ # Get the first value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
+ # df.select(Polars.col("a").first)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 1 │
+ # # └─────┘
def first
wrap_expr(_rbexpr.first)
end
+ # Get the last value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
+ # df.select(Polars.col("a").last)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 2 │
+ # # └─────┘
def last
wrap_expr(_rbexpr.last)
end
+ # Apply window function over a subgroup.
+ #
+ # This is similar to a groupby + aggregation + self join.
+ # Or similar to [window functions in Postgres](https://www.postgresql.org/docs/current/tutorial-window.html).
+ #
+ # @param expr [Object]
+ # Column(s) to group by.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "groups" => ["g1", "g1", "g2"],
+ # "values" => [1, 2, 3]
+ # }
+ # )
+ # df.with_column(
+ # Polars.col("values").max.over("groups").alias("max_by_group")
+ # )
+ # # =>
+ # # shape: (3, 3)
+ # # ┌────────┬────────┬──────────────┐
+ # # │ groups ┆ values ┆ max_by_group │
+ # # │ --- ┆ --- ┆ --- │
+ # # │ str ┆ i64 ┆ i64 │
+ # # ╞════════╪════════╪══════════════╡
+ # # │ g1 ┆ 1 ┆ 2 │
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ g1 ┆ 2 ┆ 2 │
+ # # ├╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ g2 ┆ 3 ┆ 3 │
+ # # └────────┴────────┴──────────────┘
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "groups" => [1, 1, 2, 2, 1, 2, 3, 3, 1],
+ # "values" => [1, 2, 3, 4, 5, 6, 7, 8, 8]
+ # }
+ # )
+ # df.lazy
+ # .select([Polars.col("groups").sum.over("groups")])
+ # .collect
+ # # =>
+ # # shape: (9, 1)
+ # # ┌────────┐
+ # # │ groups │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞════════╡
+ # # │ 4 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 4 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 6 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 6 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ ... │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 6 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 6 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 6 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 4 │
+ # # └────────┘
def over(expr)
rbexprs = Utils.selection_to_rbexpr_list(expr)
wrap_expr(_rbexpr.over(rbexprs))
end
+ # Get mask of unique values.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
+ # df.select(Polars.col("a").is_unique)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌───────┐
+ # # │ a │
+ # # │ --- │
+ # # │ bool │
+ # # ╞═══════╡
+ # # │ false │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ false │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ true │
+ # # └───────┘
def is_unique
wrap_expr(_rbexpr.is_unique)
end
+ # Get a mask of the first unique value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "num" => [1, 2, 3, 1, 5]
+ # }
+ # )
+ # df.with_column(Polars.col("num").is_first.alias("is_first"))
+ # # =>
+ # # shape: (5, 2)
+ # # ┌─────┬──────────┐
+ # # │ num ┆ is_first │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ bool │
+ # # ╞═════╪══════════╡
+ # # │ 1 ┆ true │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ true │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 3 ┆ true │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1 ┆ false │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 5 ┆ true │
+ # # └─────┴──────────┘
def is_first
wrap_expr(_rbexpr.is_first)
end
+ # Get mask of duplicated values.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 1, 2]})
+ # df.select(Polars.col("a").is_duplicated)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌───────┐
+ # # │ a │
+ # # │ --- │
+ # # │ bool │
+ # # ╞═══════╡
+ # # │ true │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ true │
+ # # ├╌╌╌╌╌╌╌┤
+ # # │ false │
+ # # └───────┘
def is_duplicated
wrap_expr(_rbexpr.is_duplicated)
end
+ # Get quantile value.
+ #
+ # @param quantile [Float]
+ # Quantile between 0.0 and 1.0.
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
+ # Interpolation method.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [0, 1, 2, 3, 4, 5]})
+ # df.select(Polars.col("a").quantile(0.3))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 1.0 │
+ # # └─────┘
+ #
+ # @example
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "higher"))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 2.0 │
+ # # └─────┘
+ #
+ # @example
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "lower"))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 1.0 │
+ # # └─────┘
+ #
+ # @example
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "midpoint"))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 1.5 │
+ # # └─────┘
+ #
+ # @example
+ # df.select(Polars.col("a").quantile(0.3, interpolation: "linear"))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 1.5 │
+ # # └─────┘
def quantile(quantile, interpolation: "nearest")
wrap_expr(_rbexpr.quantile(quantile, interpolation))
end
+ # Filter a single column.
+ #
+ # Mostly useful in an aggregation context. If you want to filter on a DataFrame
+ # level, use `LazyFrame#filter`.
+ #
+ # @param predicate [Expr]
+ # Boolean expression.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "group_col" => ["g1", "g1", "g2"],
+ # "b" => [1, 2, 3]
+ # }
+ # )
+ # (
+ # df.groupby("group_col").agg(
+ # [
+ # Polars.col("b").filter(Polars.col("b") < 2).sum.alias("lt"),
+ # Polars.col("b").filter(Polars.col("b") >= 2).sum.alias("gte")
+ # ]
+ # )
+ # ).sort("group_col")
+ # # =>
+ # # shape: (2, 3)
+ # # ┌───────────┬──────┬─────┐
+ # # │ group_col ┆ lt ┆ gte │
+ # # │ --- ┆ --- ┆ --- │
+ # # │ str ┆ i64 ┆ i64 │
+ # # ╞═══════════╪══════╪═════╡
+ # # │ g1 ┆ 1 ┆ 2 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ g2 ┆ null ┆ 3 │
+ # # └───────────┴──────┴─────┘
def filter(predicate)
wrap_expr(_rbexpr.filter(predicate._rbexpr))
end
+ # Filter a single column.
+ #
+ # Alias for {#filter}.
+ #
+ # @param predicate [Expr]
+ # Boolean expression.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "group_col" => ["g1", "g1", "g2"],
+ # "b" => [1, 2, 3]
+ # }
+ # )
+ # (
+ # df.groupby("group_col").agg(
+ # [
+ # Polars.col("b").where(Polars.col("b") < 2).sum.alias("lt"),
+ # Polars.col("b").where(Polars.col("b") >= 2).sum.alias("gte")
+ # ]
+ # )
+ # ).sort("group_col")
+ # # =>
+ # # shape: (2, 3)
+ # # ┌───────────┬──────┬─────┐
+ # # │ group_col ┆ lt ┆ gte │
+ # # │ --- ┆ --- ┆ --- │
+ # # │ str ┆ i64 ┆ i64 │
+ # # ╞═══════════╪══════╪═════╡
+ # # │ g1 ┆ 1 ┆ 2 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ g2 ┆ null ┆ 3 │
+ # # └───────────┴──────┴─────┘
def where(predicate)
filter(predicate)
end
- # def map
+ # Apply a custom Ruby function to a Series or sequence of Series.
+ #
+ # The output of this custom function must be a Series.
+ # If you want to apply a custom function elementwise over single values, see
+ # {#apply}. A use case for `map` is when you want to transform an
+ # expression with a third-party library.
+ #
+ # Read more in [the book](https://pola-rs.github.io/polars-book/user-guide/dsl/custom_functions.html).
+ #
+ # @param return_dtype [Symbol]
+ # Dtype of the output Series.
+ # @param agg_list [Boolean]
+ # Aggregate list.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "sine" => [0.0, 1.0, 0.0, -1.0],
+ # "cosine" => [1.0, 0.0, -1.0, 0.0]
+ # }
+ # )
+ # df.select(Polars.all.map { |x| x.to_numpy.argmax })
+ # # =>
+ # # shape: (1, 2)
+ # # ┌──────┬────────┐
+ # # │ sine ┆ cosine │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞══════╪════════╡
+ # # │ 1 ┆ 0 │
+ # # └──────┴────────┘
+ # def map(return_dtype: nil, agg_list: false, &block)
+ # if !return_dtype.nil?
+ # return_dtype = Utils.rb_type_to_dtype(return_dtype)
+ # end
+ # wrap_expr(_rbexpr.map(return_dtype, agg_list, &block))
# end
# def apply
# end
+ # Explode a list or utf8 Series. This means that every item is expanded to a new
+ # row.
#
+ # Alias for {#explode}.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => ["hello", "world"]})
+ # df.select(Polars.col("foo").flatten)
+ # # =>
+ # # shape: (10, 1)
+ # # ┌─────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ str │
+ # # ╞═════╡
+ # # │ h │
+ # # ├╌╌╌╌╌┤
+ # # │ e │
+ # # ├╌╌╌╌╌┤
+ # # │ l │
+ # # ├╌╌╌╌╌┤
+ # # │ l │
+ # # ├╌╌╌╌╌┤
+ # # │ ... │
+ # # ├╌╌╌╌╌┤
+ # # │ o │
+ # # ├╌╌╌╌╌┤
+ # # │ r │
+ # # ├╌╌╌╌╌┤
+ # # │ l │
+ # # ├╌╌╌╌╌┤
+ # # │ d │
+ # # └─────┘
def flatten
wrap_expr(_rbexpr.explode)
end
+ # Explode a list or utf8 Series.
+ #
+ # This means that every item is expanded to a new row.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"b" => [[1, 2, 3], [4, 5, 6]]})
+ # df.select(Polars.col("b").explode)
+ # # =>
+ # # shape: (6, 1)
+ # # ┌─────┐
+ # # │ b │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 2 │
+ # # ├╌╌╌╌╌┤
+ # # │ 3 │
+ # # ├╌╌╌╌╌┤
+ # # │ 4 │
+ # # ├╌╌╌╌╌┤
+ # # │ 5 │
+ # # ├╌╌╌╌╌┤
+ # # │ 6 │
+ # # └─────┘
def explode
wrap_expr(_rbexpr.explode)
end
+ # Take every nth value in the Series and return as a new Series.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
+ # df.select(Polars.col("foo").take_every(3))
+ # # =>
+ # # shape: (3, 1)
+ # # ┌─────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 4 │
+ # # ├╌╌╌╌╌┤
+ # # │ 7 │
+ # # └─────┘
def take_every(n)
wrap_expr(_rbexpr.take_every(n))
end
+ # Get the first `n` rows.
+ #
+ # @param n [Integer]
+ # Number of rows to return.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
+ # df.head(3)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌─────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 2 │
+ # # ├╌╌╌╌╌┤
+ # # │ 3 │
+ # # └─────┘
def head(n = 10)
wrap_expr(_rbexpr.head(n))
end
+ # Get the last `n` rows.
+ #
+ # @param n [Integer]
+ # Number of rows to return.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7]})
+ # df.tail(3)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌─────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 5 │
+ # # ├╌╌╌╌╌┤
+ # # │ 6 │
+ # # ├╌╌╌╌╌┤
+ # # │ 7 │
+ # # └─────┘
def tail(n = 10)
wrap_expr(_rbexpr.tail(n))
end
+ # Get the first `n` rows.
+ #
+ # Alias for {#head}.
+ #
+ # @param n [Integer]
+ # Number of rows to return.
+ #
+ # @return [Expr]
def limit(n = 10)
head(n)
end
+ # Raise expression to the power of exponent.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4]})
+ # df.select(Polars.col("foo").pow(3))
+ # # =>
+ # # shape: (4, 1)
+ # # ┌──────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════╡
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 8.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 27.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 64.0 │
+ # # └──────┘
def pow(exponent)
exponent = Utils.expr_to_lit_or_expr(exponent)
wrap_expr(_rbexpr.pow(exponent._rbexpr))
end
- # def is_in
- # end
+ # Check if elements of this expression are present in the other Series.
+ #
+ # @param other [Object]
+ # Series or sequence of primitive type.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {"sets" => [[1, 2, 3], [1, 2], [9, 10]], "optional_members" => [1, 2, 3]}
+ # )
+ # df.select([Polars.col("optional_members").is_in("sets").alias("contains")])
+ # # =>
+ # # shape: (3, 1)
+ # # ┌──────────┐
+ # # │ contains │
+ # # │ --- │
+ # # │ bool │
+ # # ╞══════════╡
+ # # │ true │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ true │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ false │
+ # # └──────────┘
+ def is_in(other)
+ if other.is_a?(Array)
+ if other.length == 0
+ other = Polars.lit(nil)
+ else
+ other = Polars.lit(Series.new(other))
+ end
+ else
+ other = Utils.expr_to_lit_or_expr(other, str_to_lit: false)
+ end
+ wrap_expr(_rbexpr.is_in(other._rbexpr))
+ end
+ # Repeat the elements in this Series as specified in the given expression.
#
+ # The repeated elements are expanded into a `List`.
+ #
+ # @param by [Object]
+ # Numeric column that determines how often the values will be repeated.
+ # The column will be coerced to UInt32. Give this dtype to make the coercion a
+ # no-op.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => ["x", "y", "z"],
+ # "n" => [1, 2, 3]
+ # }
+ # )
+ # df.select(Polars.col("a").repeat_by("n"))
+ # # =>
+ # # shape: (3, 1)
+ # # ┌─────────────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ list[str] │
+ # # ╞═════════════════╡
+ # # │ ["x"] │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ ["y", "y"] │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ ["z", "z", "z"] │
+ # # └─────────────────┘
def repeat_by(by)
- by = Utils.expr_to_lit_or_expr(by, false)
+ by = Utils.expr_to_lit_or_expr(by, str_to_lit: false)
wrap_expr(_rbexpr.repeat_by(by._rbexpr))
end
- # def is_between
- # end
+ # Check if this expression is between start and end.
+ #
+ # @param start [Object]
+ # Lower bound as primitive type or datetime.
+ # @param _end [Object]
+ # Upper bound as primitive type or datetime.
+ # @param include_bounds [Boolean]
+ # False: Exclude both start and end (default).
+ # True: Include both start and end.
+ # (False, False): Exclude start and exclude end.
+ # (True, True): Include start and include end.
+ # (False, True): Exclude start and include end.
+ # (True, False): Include start and exclude end.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"num" => [1, 2, 3, 4, 5]})
+ # df.with_column(Polars.col("num").is_between(2, 4))
+ # # =>
+ # # shape: (5, 2)
+ # # ┌─────┬────────────┐
+ # # │ num ┆ is_between │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ bool │
+ # # ╞═════╪════════════╡
+ # # │ 1 ┆ false │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ false │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 3 ┆ true │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 4 ┆ false │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 5 ┆ false │
+ # # └─────┴────────────┘
+ def is_between(start, _end, include_bounds: false)
+ if include_bounds == false || include_bounds == [false, false]
+ ((self > start) & (self < _end)).alias("is_between")
+ elsif include_bounds == true || include_bounds == [true, true]
+ ((self >= start) & (self <= _end)).alias("is_between")
+ elsif include_bounds == [false, true]
+ ((self > start) & (self <= _end)).alias("is_between")
+ elsif include_bounds == [true, false]
+ ((self >= start) & (self < _end)).alias("is_between")
+ else
+ raise ArgumentError, "include_bounds should be a bool or [bool, bool]."
+ end
+ end
# def _hash
# end
+ # Reinterpret the underlying bits as a signed/unsigned integer.
#
+ # This operation is only allowed for 64bit integers. For lower bits integers,
+ # you can safely use that cast operation.
+ #
+ # @param signed [Boolean]
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 1, 2], dtype: :u64)
+ # df = Polars::DataFrame.new([s])
+ # df.select(
+ # [
+ # Polars.col("a").reinterpret(signed: true).alias("reinterpreted"),
+ # Polars.col("a").alias("original")
+ # ]
+ # )
+ # # =>
+ # # shape: (3, 2)
+ # # ┌───────────────┬──────────┐
+ # # │ reinterpreted ┆ original │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ u64 │
+ # # ╞═══════════════╪══════════╡
+ # # │ 1 ┆ 1 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1 ┆ 1 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ 2 │
+ # # └───────────────┴──────────┘
def reinterpret(signed: false)
wrap_expr(_rbexpr.reinterpret(signed))
end
# def _inspect
# end
+ # Fill nulls with linear interpolation over missing values.
#
+ # Can also be used to regrid data to a new grid - see examples below.
+ #
+ # @return [Expr]
+ #
+ # @example Fill nulls with linear interpolation
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, nil, 3],
+ # "b" => [1.0, Float::NAN, 3.0]
+ # }
+ # )
+ # df.select(Polars.all.interpolate)
+ # # =>
+ # # shape: (3, 2)
+ # # ┌─────┬─────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ f64 │
+ # # ╞═════╪═════╡
+ # # │ 1 ┆ 1.0 │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 2 ┆ NaN │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌┤
+ # # │ 3 ┆ 3.0 │
+ # # └─────┴─────┘
def interpolate
wrap_expr(_rbexpr.interpolate)
end
- # def rolling_min
- # end
+ # Apply a rolling min (moving min) over the values in this array.
+ #
+ # A window of length `window_size` will traverse the array. The values that fill
+ # this window will (optionally) be multiplied with the weights given by the
+ # `weight` vector. The resulting values will be aggregated to their sum.
+ #
+ # @param window_size [Integer]
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
+ # size indicated by a timedelta or the following string language:
+ #
+ # - 1ns (1 nanosecond)
+ # - 1us (1 microsecond)
+ # - 1ms (1 millisecond)
+ # - 1s (1 second)
+ # - 1m (1 minute)
+ # - 1h (1 hour)
+ # - 1d (1 day)
+ # - 1w (1 week)
+ # - 1mo (1 calendar month)
+ # - 1y (1 calendar year)
+ # - 1i (1 index count)
+ #
+ # If a timedelta or the dynamic string language is used, the `by`
+ # and `closed` arguments must also be set.
+ # @param weights [Array]
+ # An optional slice with the same length as the window that will be multiplied
+ # elementwise with the values in the window.
+ # @param min_periods [Integer]
+ # The number of values in the window that should be non-null before computing
+ # a result. If None, it will be set equal to window size.
+ # @param center [Boolean]
+ # Set the labels at the center of the window
+ # @param by [String]
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
+ # set the column that will be used to determine the windows. This column must
+ # be of dtype `{Date, Datetime}`
+ # @param closed ["left", "right", "both", "none"]
+ # Define whether the temporal window interval is closed or not.
+ #
+ # @note
+ # This functionality is experimental and may change without it being considered a
+ # breaking change.
+ #
+ # @note
+ # If you want to compute multiple aggregation statistics over the same dynamic
+ # window, consider using `groupby_rolling` this method can cache the window size
+ # computation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
+ # df.select(
+ # [
+ # Polars.col("A").rolling_min(2)
+ # ]
+ # )
+ # # =>
+ # # shape: (6, 1)
+ # # ┌──────┐
+ # # │ A │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════╡
+ # # │ null │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 2.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 3.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 4.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 5.0 │
+ # # └──────┘
+ def rolling_min(
+ window_size,
+ weights: nil,
+ min_periods: nil,
+ center: false,
+ by: nil,
+ closed: "left"
+ )
+ window_size, min_periods = _prepare_rolling_window_args(
+ window_size, min_periods
+ )
+ wrap_expr(
+ _rbexpr.rolling_min(
+ window_size, weights, min_periods, center, by, closed
+ )
+ )
+ end
- # def rolling_max
- # end
+ # Apply a rolling max (moving max) over the values in this array.
+ #
+ # A window of length `window_size` will traverse the array. The values that fill
+ # this window will (optionally) be multiplied with the weights given by the
+ # `weight` vector. The resulting values will be aggregated to their sum.
+ #
+ # @param window_size [Integer]
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
+ # size indicated by a timedelta or the following string language:
+ #
+ # - 1ns (1 nanosecond)
+ # - 1us (1 microsecond)
+ # - 1ms (1 millisecond)
+ # - 1s (1 second)
+ # - 1m (1 minute)
+ # - 1h (1 hour)
+ # - 1d (1 day)
+ # - 1w (1 week)
+ # - 1mo (1 calendar month)
+ # - 1y (1 calendar year)
+ # - 1i (1 index count)
+ #
+ # If a timedelta or the dynamic string language is used, the `by`
+ # and `closed` arguments must also be set.
+ # @param weights [Array]
+ # An optional slice with the same length as the window that will be multiplied
+ # elementwise with the values in the window.
+ # @param min_periods [Integer]
+ # The number of values in the window that should be non-null before computing
+ # a result. If None, it will be set equal to window size.
+ # @param center [Boolean]
+ # Set the labels at the center of the window
+ # @param by [String]
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
+ # set the column that will be used to determine the windows. This column must
+ # be of dtype `{Date, Datetime}`
+ # @param closed ["left", "right", "both", "none"]
+ # Define whether the temporal window interval is closed or not.
+ #
+ # @note
+ # This functionality is experimental and may change without it being considered a
+ # breaking change.
+ #
+ # @note
+ # If you want to compute multiple aggregation statistics over the same dynamic
+ # window, consider using `groupby_rolling` this method can cache the window size
+ # computation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
+ # df.select(
+ # [
+ # Polars.col("A").rolling_max(2)
+ # ]
+ # )
+ # # =>
+ # # shape: (6, 1)
+ # # ┌──────┐
+ # # │ A │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════╡
+ # # │ null │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 2.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 3.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 4.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 5.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 6.0 │
+ # # └──────┘
+ def rolling_max(
+ window_size,
+ weights: nil,
+ min_periods: nil,
+ center: false,
+ by: nil,
+ closed: "left"
+ )
+ window_size, min_periods = _prepare_rolling_window_args(
+ window_size, min_periods
+ )
+ wrap_expr(
+ _rbexpr.rolling_max(
+ window_size, weights, min_periods, center, by, closed
+ )
+ )
+ end
- # def rolling_mean
- # end
+ # Apply a rolling mean (moving mean) over the values in this array.
+ #
+ # A window of length `window_size` will traverse the array. The values that fill
+ # this window will (optionally) be multiplied with the weights given by the
+ # `weight` vector. The resulting values will be aggregated to their sum.
+ #
+ # @param window_size [Integer]
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
+ # size indicated by a timedelta or the following string language:
+ #
+ # - 1ns (1 nanosecond)
+ # - 1us (1 microsecond)
+ # - 1ms (1 millisecond)
+ # - 1s (1 second)
+ # - 1m (1 minute)
+ # - 1h (1 hour)
+ # - 1d (1 day)
+ # - 1w (1 week)
+ # - 1mo (1 calendar month)
+ # - 1y (1 calendar year)
+ # - 1i (1 index count)
+ #
+ # If a timedelta or the dynamic string language is used, the `by`
+ # and `closed` arguments must also be set.
+ # @param weights [Array]
+ # An optional slice with the same length as the window that will be multiplied
+ # elementwise with the values in the window.
+ # @param min_periods [Integer]
+ # The number of values in the window that should be non-null before computing
+ # a result. If None, it will be set equal to window size.
+ # @param center [Boolean]
+ # Set the labels at the center of the window
+ # @param by [String]
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
+ # set the column that will be used to determine the windows. This column must
+ # be of dtype `{Date, Datetime}`
+ # @param closed ["left", "right", "both", "none"]
+ # Define whether the temporal window interval is closed or not.
+ #
+ # @note
+ # This functionality is experimental and may change without it being considered a
+ # breaking change.
+ #
+ # @note
+ # If you want to compute multiple aggregation statistics over the same dynamic
+ # window, consider using `groupby_rolling` this method can cache the window size
+ # computation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"A" => [1.0, 8.0, 6.0, 2.0, 16.0, 10.0]})
+ # df.select(
+ # [
+ # Polars.col("A").rolling_mean(2)
+ # ]
+ # )
+ # # =>
+ # # shape: (6, 1)
+ # # ┌──────┐
+ # # │ A │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════╡
+ # # │ null │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 4.5 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 7.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 4.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 9.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 13.0 │
+ # # └──────┘
+ def rolling_mean(
+ window_size,
+ weights: nil,
+ min_periods: nil,
+ center: false,
+ by: nil,
+ closed: "left"
+ )
+ window_size, min_periods = _prepare_rolling_window_args(
+ window_size, min_periods
+ )
+ wrap_expr(
+ _rbexpr.rolling_mean(
+ window_size, weights, min_periods, center, by, closed
+ )
+ )
+ end
- # def rolling_sum
- # end
+ # Apply a rolling sum (moving sum) over the values in this array.
+ #
+ # A window of length `window_size` will traverse the array. The values that fill
+ # this window will (optionally) be multiplied with the weights given by the
+ # `weight` vector. The resulting values will be aggregated to their sum.
+ #
+ # @param window_size [Integer]
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
+ # size indicated by a timedelta or the following string language:
+ #
+ # - 1ns (1 nanosecond)
+ # - 1us (1 microsecond)
+ # - 1ms (1 millisecond)
+ # - 1s (1 second)
+ # - 1m (1 minute)
+ # - 1h (1 hour)
+ # - 1d (1 day)
+ # - 1w (1 week)
+ # - 1mo (1 calendar month)
+ # - 1y (1 calendar year)
+ # - 1i (1 index count)
+ #
+ # If a timedelta or the dynamic string language is used, the `by`
+ # and `closed` arguments must also be set.
+ # @param weights [Array]
+ # An optional slice with the same length as the window that will be multiplied
+ # elementwise with the values in the window.
+ # @param min_periods [Integer]
+ # The number of values in the window that should be non-null before computing
+ # a result. If None, it will be set equal to window size.
+ # @param center [Boolean]
+ # Set the labels at the center of the window
+ # @param by [String]
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
+ # set the column that will be used to determine the windows. This column must
+ # be of dtype `{Date, Datetime}`
+ # @param closed ["left", "right", "both", "none"]
+ # Define whether the temporal window interval is closed or not.
+ #
+ # @note
+ # This functionality is experimental and may change without it being considered a
+ # breaking change.
+ #
+ # @note
+ # If you want to compute multiple aggregation statistics over the same dynamic
+ # window, consider using `groupby_rolling` this method can cache the window size
+ # computation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]})
+ # df.select(
+ # [
+ # Polars.col("A").rolling_sum(2)
+ # ]
+ # )
+ # # =>
+ # # shape: (6, 1)
+ # # ┌──────┐
+ # # │ A │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════╡
+ # # │ null │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 3.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 5.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 7.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 9.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 11.0 │
+ # # └──────┘
+ def rolling_sum(
+ window_size,
+ weights: nil,
+ min_periods: nil,
+ center: false,
+ by: nil,
+ closed: "left"
+ )
+ window_size, min_periods = _prepare_rolling_window_args(
+ window_size, min_periods
+ )
+ wrap_expr(
+ _rbexpr.rolling_sum(
+ window_size, weights, min_periods, center, by, closed
+ )
+ )
+ end
- # def rolling_std
- # end
+ # Compute a rolling standard deviation.
+ #
+ # A window of length `window_size` will traverse the array. The values that fill
+ # this window will (optionally) be multiplied with the weights given by the
+ # `weight` vector. The resulting values will be aggregated to their sum.
+ #
+ # @param window_size [Integer]
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
+ # size indicated by a timedelta or the following string language:
+ #
+ # - 1ns (1 nanosecond)
+ # - 1us (1 microsecond)
+ # - 1ms (1 millisecond)
+ # - 1s (1 second)
+ # - 1m (1 minute)
+ # - 1h (1 hour)
+ # - 1d (1 day)
+ # - 1w (1 week)
+ # - 1mo (1 calendar month)
+ # - 1y (1 calendar year)
+ # - 1i (1 index count)
+ #
+ # If a timedelta or the dynamic string language is used, the `by`
+ # and `closed` arguments must also be set.
+ # @param weights [Array]
+ # An optional slice with the same length as the window that will be multiplied
+ # elementwise with the values in the window.
+ # @param min_periods [Integer]
+ # The number of values in the window that should be non-null before computing
+ # a result. If None, it will be set equal to window size.
+ # @param center [Boolean]
+ # Set the labels at the center of the window
+ # @param by [String]
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
+ # set the column that will be used to determine the windows. This column must
+ # be of dtype `{Date, Datetime}`
+ # @param closed ["left", "right", "both", "none"]
+ # Define whether the temporal window interval is closed or not.
+ #
+ # @note
+ # This functionality is experimental and may change without it being considered a
+ # breaking change.
+ #
+ # @note
+ # If you want to compute multiple aggregation statistics over the same dynamic
+ # window, consider using `groupby_rolling` this method can cache the window size
+ # computation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
+ # df.select(
+ # [
+ # Polars.col("A").rolling_std(3)
+ # ]
+ # )
+ # # =>
+ # # shape: (6, 1)
+ # # ┌──────────┐
+ # # │ A │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ null │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1.527525 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2.0 │
+ # # └──────────┘
+ def rolling_std(
+ window_size,
+ weights: nil,
+ min_periods: nil,
+ center: false,
+ by: nil,
+ closed: "left"
+ )
+ window_size, min_periods = _prepare_rolling_window_args(
+ window_size, min_periods
+ )
+ wrap_expr(
+ _rbexpr.rolling_std(
+ window_size, weights, min_periods, center, by, closed
+ )
+ )
+ end
- # def rolling_var
- # end
+ # Compute a rolling variance.
+ #
+ # A window of length `window_size` will traverse the array. The values that fill
+ # this window will (optionally) be multiplied with the weights given by the
+ # `weight` vector. The resulting values will be aggregated to their sum.
+ #
+ # @param window_size [Integer]
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
+ # size indicated by a timedelta or the following string language:
+ #
+ # - 1ns (1 nanosecond)
+ # - 1us (1 microsecond)
+ # - 1ms (1 millisecond)
+ # - 1s (1 second)
+ # - 1m (1 minute)
+ # - 1h (1 hour)
+ # - 1d (1 day)
+ # - 1w (1 week)
+ # - 1mo (1 calendar month)
+ # - 1y (1 calendar year)
+ # - 1i (1 index count)
+ #
+ # If a timedelta or the dynamic string language is used, the `by`
+ # and `closed` arguments must also be set.
+ # @param weights [Array]
+ # An optional slice with the same length as the window that will be multiplied
+ # elementwise with the values in the window.
+ # @param min_periods [Integer]
+ # The number of values in the window that should be non-null before computing
+ # a result. If None, it will be set equal to window size.
+ # @param center [Boolean]
+ # Set the labels at the center of the window
+ # @param by [String]
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
+ # set the column that will be used to determine the windows. This column must
+ # be of dtype `{Date, Datetime}`
+ # @param closed ["left", "right", "both", "none"]
+ # Define whether the temporal window interval is closed or not.
+ #
+ # @note
+ # This functionality is experimental and may change without it being considered a
+ # breaking change.
+ #
+ # @note
+ # If you want to compute multiple aggregation statistics over the same dynamic
+ # window, consider using `groupby_rolling` this method can cache the window size
+ # computation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
+ # df.select(
+ # [
+ # Polars.col("A").rolling_var(3)
+ # ]
+ # )
+ # # =>
+ # # shape: (6, 1)
+ # # ┌──────────┐
+ # # │ A │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ null │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2.333333 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 4.0 │
+ # # └──────────┘
+ def rolling_var(
+ window_size,
+ weights: nil,
+ min_periods: nil,
+ center: false,
+ by: nil,
+ closed: "left"
+ )
+ window_size, min_periods = _prepare_rolling_window_args(
+ window_size, min_periods
+ )
+ wrap_expr(
+ _rbexpr.rolling_var(
+ window_size, weights, min_periods, center, by, closed
+ )
+ )
+ end
- # def rolling_median
- # end
+ # Compute a rolling median.
+ #
+ # @param window_size [Integer]
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
+ # size indicated by a timedelta or the following string language:
+ #
+ # - 1ns (1 nanosecond)
+ # - 1us (1 microsecond)
+ # - 1ms (1 millisecond)
+ # - 1s (1 second)
+ # - 1m (1 minute)
+ # - 1h (1 hour)
+ # - 1d (1 day)
+ # - 1w (1 week)
+ # - 1mo (1 calendar month)
+ # - 1y (1 calendar year)
+ # - 1i (1 index count)
+ #
+ # If a timedelta or the dynamic string language is used, the `by`
+ # and `closed` arguments must also be set.
+ # @param weights [Array]
+ # An optional slice with the same length as the window that will be multiplied
+ # elementwise with the values in the window.
+ # @param min_periods [Integer]
+ # The number of values in the window that should be non-null before computing
+ # a result. If None, it will be set equal to window size.
+ # @param center [Boolean]
+ # Set the labels at the center of the window
+ # @param by [String]
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
+ # set the column that will be used to determine the windows. This column must
+ # be of dtype `{Date, Datetime}`
+ # @param closed ["left", "right", "both", "none"]
+ # Define whether the temporal window interval is closed or not.
+ #
+ # @note
+ # This functionality is experimental and may change without it being considered a
+ # breaking change.
+ #
+ # @note
+ # If you want to compute multiple aggregation statistics over the same dynamic
+ # window, consider using `groupby_rolling` this method can cache the window size
+ # computation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
+ # df.select(
+ # [
+ # Polars.col("A").rolling_median(3)
+ # ]
+ # )
+ # # =>
+ # # shape: (6, 1)
+ # # ┌──────┐
+ # # │ A │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════╡
+ # # │ null │
+ # # ├╌╌╌╌╌╌┤
+ # # │ null │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 2.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 3.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 4.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 6.0 │
+ # # └──────┘
+ def rolling_median(
+ window_size,
+ weights: nil,
+ min_periods: nil,
+ center: false,
+ by: nil,
+ closed: "left"
+ )
+ window_size, min_periods = _prepare_rolling_window_args(
+ window_size, min_periods
+ )
+ wrap_expr(
+ _rbexpr.rolling_median(
+ window_size, weights, min_periods, center, by, closed
+ )
+ )
+ end
- # def rolling_quantile
- # end
+ # Compute a rolling quantile.
+ #
+ # @param quantile [Float]
+ # Quantile between 0.0 and 1.0.
+ # @param interpolation ["nearest", "higher", "lower", "midpoint", "linear"]
+ # Interpolation method.
+ # @param window_size [Integer]
+ # The length of the window. Can be a fixed integer size, or a dynamic temporal
+ # size indicated by a timedelta or the following string language:
+ #
+ # - 1ns (1 nanosecond)
+ # - 1us (1 microsecond)
+ # - 1ms (1 millisecond)
+ # - 1s (1 second)
+ # - 1m (1 minute)
+ # - 1h (1 hour)
+ # - 1d (1 day)
+ # - 1w (1 week)
+ # - 1mo (1 calendar month)
+ # - 1y (1 calendar year)
+ # - 1i (1 index count)
+ #
+ # If a timedelta or the dynamic string language is used, the `by`
+ # and `closed` arguments must also be set.
+ # @param weights [Array]
+ # An optional slice with the same length as the window that will be multiplied
+ # elementwise with the values in the window.
+ # @param min_periods [Integer]
+ # The number of values in the window that should be non-null before computing
+ # a result. If None, it will be set equal to window size.
+ # @param center [Boolean]
+ # Set the labels at the center of the window
+ # @param by [String]
+ # If the `window_size` is temporal for instance `"5h"` or `"3s`, you must
+ # set the column that will be used to determine the windows. This column must
+ # be of dtype `{Date, Datetime}`
+ # @param closed ["left", "right", "both", "none"]
+ # Define whether the temporal window interval is closed or not.
+ #
+ # @note
+ # This functionality is experimental and may change without it being considered a
+ # breaking change.
+ #
+ # @note
+ # If you want to compute multiple aggregation statistics over the same dynamic
+ # window, consider using `groupby_rolling` this method can cache the window size
+ # computation.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"A" => [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]})
+ # df.select(
+ # [
+ # Polars.col("A").rolling_quantile(0.33, window_size: 3)
+ # ]
+ # )
+ # # =>
+ # # shape: (6, 1)
+ # # ┌──────┐
+ # # │ A │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════╡
+ # # │ null │
+ # # ├╌╌╌╌╌╌┤
+ # # │ null │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 2.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 3.0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 4.0 │
+ # # └──────┘
+ def rolling_quantile(
+ quantile,
+ interpolation: "nearest",
+ window_size: 2,
+ weights: nil,
+ min_periods: nil,
+ center: false,
+ by: nil,
+ closed: "left"
+ )
+ window_size, min_periods = _prepare_rolling_window_args(
+ window_size, min_periods
+ )
+ wrap_expr(
+ _rbexpr.rolling_quantile(
+ quantile, interpolation, window_size, weights, min_periods, center, by, closed
+ )
+ )
+ end
# def rolling_apply
# end
+ # Compute a rolling skew.
#
+ # @param window_size [Integer]
+ # Integer size of the rolling window.
+ # @param bias [Boolean]
+ # If false, the calculations are corrected for statistical bias.
+ #
+ # @return [Expr]
def rolling_skew(window_size, bias: true)
wrap_expr(_rbexpr.rolling_skew(window_size, bias))
end
+ # Compute absolute values.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "A" => [-1.0, 0.0, 1.0, 2.0]
+ # }
+ # )
+ # df.select(Polars.col("A").abs)
+ # # =>
+ # # shape: (4, 1)
+ # # ┌─────┐
+ # # │ A │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌┤
+ # # │ 0.0 │
+ # # ├╌╌╌╌╌┤
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌┤
+ # # │ 2.0 │
+ # # └─────┘
def abs
wrap_expr(_rbexpr.abs)
end
+ # Get the index values that would sort this column.
+ #
+ # Alias for {#arg_sort}.
+ #
+ # @param reverse [Boolean]
+ # Sort in reverse (descending) order.
+ # @param nulls_last [Boolean]
+ # Place null values last instead of first.
+ #
+ # @return [expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [20, 10, 30]
+ # }
+ # )
+ # df.select(Polars.col("a").argsort)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ u32 │
+ # # ╞═════╡
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 0 │
+ # # ├╌╌╌╌╌┤
+ # # │ 2 │
+ # # └─────┘
def argsort(reverse: false, nulls_last: false)
arg_sort(reverse: reverse, nulls_last: nulls_last)
end
+ # Assign ranks to data, dealing with ties appropriately.
+ #
+ # @param method ["average", "min", "max", "dense", "ordinal", "random"]
+ # The method used to assign ranks to tied elements.
+ # The following methods are available:
+ #
+ # - 'average' : The average of the ranks that would have been assigned to
+ # all the tied values is assigned to each value.
+ # - 'min' : The minimum of the ranks that would have been assigned to all
+ # the tied values is assigned to each value. (This is also referred to
+ # as "competition" ranking.)
+ # - 'max' : The maximum of the ranks that would have been assigned to all
+ # the tied values is assigned to each value.
+ # - 'dense' : Like 'min', but the rank of the next highest element is
+ # assigned the rank immediately after those assigned to the tied
+ # elements.
+ # - 'ordinal' : All values are given a distinct rank, corresponding to
+ # the order that the values occur in the Series.
+ # - 'random' : Like 'ordinal', but the rank for ties is not dependent
+ # on the order that the values occur in the Series.
+ # @param reverse [Boolean]
+ # Reverse the operation.
+ #
+ # @return [Expr]
+ #
+ # @example The 'average' method:
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
+ # df.select(Polars.col("a").rank)
+ # # =>
+ # # shape: (5, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f32 │
+ # # ╞═════╡
+ # # │ 3.0 │
+ # # ├╌╌╌╌╌┤
+ # # │ 4.5 │
+ # # ├╌╌╌╌╌┤
+ # # │ 1.5 │
+ # # ├╌╌╌╌╌┤
+ # # │ 1.5 │
+ # # ├╌╌╌╌╌┤
+ # # │ 4.5 │
+ # # └─────┘
+ #
+ # @example The 'ordinal' method:
+ # df = Polars::DataFrame.new({"a" => [3, 6, 1, 1, 6]})
+ # df.select(Polars.col("a").rank(method: "ordinal"))
+ # # =>
+ # # shape: (5, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ u32 │
+ # # ╞═════╡
+ # # │ 3 │
+ # # ├╌╌╌╌╌┤
+ # # │ 4 │
+ # # ├╌╌╌╌╌┤
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 2 │
+ # # ├╌╌╌╌╌┤
+ # # │ 5 │
+ # # └─────┘
def rank(method: "average", reverse: false)
wrap_expr(_rbexpr.rank(method, reverse))
end
+ # Calculate the n-th discrete difference.
+ #
+ # @param n [Integer]
+ # Number of slots to shift.
+ # @param null_behavior ["ignore", "drop"]
+ # How to handle null values.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [20, 10, 30]
+ # }
+ # )
+ # df.select(Polars.col("a").diff)
+ # # =>
+ # # shape: (3, 1)
+ # # ┌──────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞══════╡
+ # # │ null │
+ # # ├╌╌╌╌╌╌┤
+ # # │ -10 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 20 │
+ # # └──────┘
def diff(n: 1, null_behavior: "ignore")
wrap_expr(_rbexpr.diff(n, null_behavior))
end
+ # Computes percentage change between values.
+ #
+ # Percentage change (as fraction) between current element and most-recent
+ # non-null element at least `n` period(s) before the current element.
+ #
+ # Computes the change from the previous row by default.
+ #
+ # @param n [Integer]
+ # Periods to shift for forming percent change.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [10, 11, 12, nil, 12]
+ # }
+ # )
+ # df.with_column(Polars.col("a").pct_change.alias("pct_change"))
+ # # =>
+ # # shape: (5, 2)
+ # # ┌──────┬────────────┐
+ # # │ a ┆ pct_change │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ f64 │
+ # # ╞══════╪════════════╡
+ # # │ 10 ┆ null │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 11 ┆ 0.1 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 12 ┆ 0.090909 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ 0.0 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 12 ┆ 0.0 │
+ # # └──────┴────────────┘
def pct_change(n: 1)
wrap_expr(_rbexpr.pct_change(n))
end
+ # Compute the sample skewness of a data set.
+ #
+ # For normally distributed data, the skewness should be about zero. For
+ # unimodal continuous distributions, a skewness value greater than zero means
+ # that there is more weight in the right tail of the distribution. The
+ # function `skewtest` can be used to determine if the skewness value
+ # is close enough to zero, statistically speaking.
+ #
+ # @param bias [Boolean]
+ # If false, the calculations are corrected for statistical bias.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
+ # df.select(Polars.col("a").skew)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 0.343622 │
+ # # └──────────┘
def skew(bias: true)
wrap_expr(_rbexpr.skew(bias))
end
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
+ #
+ # Kurtosis is the fourth central moment divided by the square of the
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
+ # the result to give 0.0 for a normal distribution.
+ # If bias is False then the kurtosis is calculated using k statistics to
+ # eliminate bias coming from biased moment estimators
+ #
+ # @param fisher [Boolean]
+ # If true, Fisher's definition is used (normal ==> 0.0). If false,
+ # Pearson's definition is used (normal ==> 3.0).
+ # @param bias [Boolean]
+ # If false, the calculations are corrected for statistical bias.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
+ # df.select(Polars.col("a").kurtosis)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌───────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═══════════╡
+ # # │ -1.153061 │
+ # # └───────────┘
def kurtosis(fisher: true, bias: true)
wrap_expr(_rbexpr.kurtosis(fisher, bias))
end
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
+ #
+ # Only works for numerical types.
+ #
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
+ # expression. See `when` for more information.
+ #
+ # @param min_val [Numeric]
+ # Minimum value.
+ # @param max_val [Numeric]
+ # Maximum value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
+ # df.with_column(Polars.col("foo").clip(1, 10).alias("foo_clipped"))
+ # # =>
+ # # shape: (4, 2)
+ # # ┌──────┬─────────────┐
+ # # │ foo ┆ foo_clipped │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞══════╪═════════════╡
+ # # │ -50 ┆ 1 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 5 ┆ 5 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ null │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 50 ┆ 10 │
+ # # └──────┴─────────────┘
def clip(min_val, max_val)
wrap_expr(_rbexpr.clip(min_val, max_val))
end
+ # Clip (limit) the values in an array to a `min` boundary.
+ #
+ # Only works for numerical types.
+ #
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
+ # expression. See `when` for more information.
+ #
+ # @param min_val [Numeric]
+ # Minimum value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
+ # df.with_column(Polars.col("foo").clip_min(0).alias("foo_clipped"))
+ # # =>
+ # # shape: (4, 2)
+ # # ┌──────┬─────────────┐
+ # # │ foo ┆ foo_clipped │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞══════╪═════════════╡
+ # # │ -50 ┆ 0 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 5 ┆ 5 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ null │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 50 ┆ 50 │
+ # # └──────┴─────────────┘
def clip_min(min_val)
wrap_expr(_rbexpr.clip_min(min_val))
end
+ # Clip (limit) the values in an array to a `max` boundary.
+ #
+ # Only works for numerical types.
+ #
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
+ # expression. See `when` for more information.
+ #
+ # @param max_val [Numeric]
+ # Maximum value.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [-50, 5, nil, 50]})
+ # df.with_column(Polars.col("foo").clip_max(0).alias("foo_clipped"))
+ # # =>
+ # # shape: (4, 2)
+ # # ┌──────┬─────────────┐
+ # # │ foo ┆ foo_clipped │
+ # # │ --- ┆ --- │
+ # # │ i64 ┆ i64 │
+ # # ╞══════╪═════════════╡
+ # # │ -50 ┆ -50 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 5 ┆ 0 │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null ┆ null │
+ # # ├╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 50 ┆ 0 │
+ # # └──────┴─────────────┘
def clip_max(max_val)
wrap_expr(_rbexpr.clip_max(max_val))
end
+ # Calculate the lower bound.
+ #
+ # Returns a unit Series with the lowest value possible for the dtype of this
+ # expression.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
+ # df.select(Polars.col("a").lower_bound)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────────────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞══════════════════════╡
+ # # │ -9223372036854775808 │
+ # # └──────────────────────┘
def lower_bound
wrap_expr(_rbexpr.lower_bound)
end
+ # Calculate the upper bound.
+ #
+ # Returns a unit Series with the highest value possible for the dtype of this
+ # expression.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3, 2, 1]})
+ # df.select(Polars.col("a").upper_bound)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────────────────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════════════════════╡
+ # # │ 9223372036854775807 │
+ # # └─────────────────────┘
def upper_bound
wrap_expr(_rbexpr.upper_bound)
end
+ # Compute the element-wise indication of the sign.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [-9.0, -0.0, 0.0, 4.0, nil]})
+ # df.select(Polars.col("a").sign)
+ # # =>
+ # # shape: (5, 1)
+ # # ┌──────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞══════╡
+ # # │ -1 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 0 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ 1 │
+ # # ├╌╌╌╌╌╌┤
+ # # │ null │
+ # # └──────┘
def sign
wrap_expr(_rbexpr.sign)
end
+ # Compute the element-wise value for the sine.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [0.0]})
+ # df.select(Polars.col("a").sin)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 0.0 │
+ # # └─────┘
def sin
wrap_expr(_rbexpr.sin)
end
+ # Compute the element-wise value for the cosine.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [0.0]})
+ # df.select(Polars.col("a").cos)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 1.0 │
+ # # └─────┘
def cos
wrap_expr(_rbexpr.cos)
end
+ # Compute the element-wise value for the tangent.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1.0]})
+ # df.select(Polars.col("a").tan)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 1.557408 │
+ # # └──────────┘
def tan
wrap_expr(_rbexpr.tan)
end
+ # Compute the element-wise value for the inverse sine.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1.0]})
+ # df.select(Polars.col("a").arcsin)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 1.570796 │
+ # # └──────────┘
def arcsin
wrap_expr(_rbexpr.arcsin)
end
+ # Compute the element-wise value for the inverse cosine.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [0.0]})
+ # df.select(Polars.col("a").arccos)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 1.570796 │
+ # # └──────────┘
def arccos
wrap_expr(_rbexpr.arccos)
end
+ # Compute the element-wise value for the inverse tangent.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1.0]})
+ # df.select(Polars.col("a").arctan)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 0.785398 │
+ # # └──────────┘
def arctan
wrap_expr(_rbexpr.arctan)
end
+ # Compute the element-wise value for the hyperbolic sine.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1.0]})
+ # df.select(Polars.col("a").sinh)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 1.175201 │
+ # # └──────────┘
def sinh
wrap_expr(_rbexpr.sinh)
end
+ # Compute the element-wise value for the hyperbolic cosine.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1.0]})
+ # df.select(Polars.col("a").cosh)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 1.543081 │
+ # # └──────────┘
def cosh
wrap_expr(_rbexpr.cosh)
end
+ # Compute the element-wise value for the hyperbolic tangent.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1.0]})
+ # df.select(Polars.col("a").tanh)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 0.761594 │
+ # # └──────────┘
def tanh
wrap_expr(_rbexpr.tanh)
end
+ # Compute the element-wise value for the inverse hyperbolic sine.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1.0]})
+ # df.select(Polars.col("a").arcsinh)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 0.881374 │
+ # # └──────────┘
def arcsinh
wrap_expr(_rbexpr.arcsinh)
end
+ # Compute the element-wise value for the inverse hyperbolic cosine.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1.0]})
+ # df.select(Polars.col("a").arccosh)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ 0.0 │
+ # # └─────┘
def arccosh
wrap_expr(_rbexpr.arccosh)
end
+ # Compute the element-wise value for the inverse hyperbolic tangent.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1.0]})
+ # df.select(Polars.col("a").arctanh)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═════╡
+ # # │ inf │
+ # # └─────┘
def arctanh
wrap_expr(_rbexpr.arctanh)
end
+ # Reshape this Expr to a flat Series or a Series of Lists.
+ #
+ # @param dims [Array]
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
+ # dimension is inferred.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"foo" => [1, 2, 3, 4, 5, 6, 7, 8, 9]})
+ # df.select(Polars.col("foo").reshape([3, 3]))
+ # # =>
+ # # shape: (3, 1)
+ # # ┌───────────┐
+ # # │ foo │
+ # # │ --- │
+ # # │ list[i64] │
+ # # ╞═══════════╡
+ # # │ [1, 2, 3] │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ [4, 5, 6] │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ [7, 8, 9] │
+ # # └───────────┘
def reshape(dims)
wrap_expr(_rbexpr.reshape(dims))
end
+ # Shuffle the contents of this expr.
+ #
+ # @param seed [Integer]
+ # Seed for the random number generator. If set to None (default), a random
+ # seed is generated using the `random` module.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
+ # df.select(Polars.col("a").shuffle(seed: 1))
+ # # =>
+ # # shape: (3, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 2 │
+ # # ├╌╌╌╌╌┤
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 3 │
+ # # └─────┘
def shuffle(seed: nil)
if seed.nil?
seed = rand(10000)
end
wrap_expr(_rbexpr.shuffle(seed))
end
- # def sample
- # end
+ # Sample from this expression.
+ #
+ # @param frac [Float]
+ # Fraction of items to return. Cannot be used with `n`.
+ # @param with_replacement [Boolean]
+ # Allow values to be sampled more than once.
+ # @param shuffle [Boolean]
+ # Shuffle the order of sampled data points.
+ # @param seed [Integer]
+ # Seed for the random number generator. If set to None (default), a random
+ # seed is used.
+ # @param n [Integer]
+ # Number of items to return. Cannot be used with `frac`.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
+ # df.select(Polars.col("a").sample(frac: 1.0, with_replacement: true, seed: 1))
+ # # =>
+ # # shape: (3, 1)
+ # # ┌─────┐
+ # # │ a │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞═════╡
+ # # │ 3 │
+ # # ├╌╌╌╌╌┤
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 1 │
+ # # └─────┘
+ def sample(
+ frac: nil,
+ with_replacement: true,
+ shuffle: false,
+ seed: nil,
+ n: nil
+ )
+ if !n.nil? && !frac.nil?
+ raise ArgumentError, "cannot specify both `n` and `frac`"
+ end
- # def ewm_mean
- # end
+ if !n.nil? && frac.nil?
+ return wrap_expr(_rbexpr.sample_n(n, with_replacement, shuffle, seed))
+ end
- # def ewm_std
- # end
+ if frac.nil?
+ frac = 1.0
+ end
+ wrap_expr(
+ _rbexpr.sample_frac(frac, with_replacement, shuffle, seed)
+ )
+ end
- # def ewm_var
- # end
+ # Exponentially-weighted moving average.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
+ # df.select(Polars.col("a").ewm_mean(com: 1))
+ # # =>
+ # # shape: (3, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1.666667 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 2.428571 │
+ # # └──────────┘
+ def ewm_mean(
+ com: nil,
+ span: nil,
+ half_life: nil,
+ alpha: nil,
+ adjust: true,
+ min_periods: 1
+ )
+ alpha = _prepare_alpha(com, span, half_life, alpha)
+ wrap_expr(_rbexpr.ewm_mean(alpha, adjust, min_periods))
+ end
+ # Exponentially-weighted moving standard deviation.
#
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
+ # df.select(Polars.col("a").ewm_std(com: 1))
+ # # =>
+ # # shape: (3, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 0.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 0.707107 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 0.963624 │
+ # # └──────────┘
+ def ewm_std(
+ com: nil,
+ span: nil,
+ half_life: nil,
+ alpha: nil,
+ adjust: true,
+ bias: false,
+ min_periods: 1
+ )
+ alpha = _prepare_alpha(com, span, half_life, alpha)
+ wrap_expr(_rbexpr.ewm_std(alpha, adjust, bias, min_periods))
+ end
+
+ # Exponentially-weighted moving variance.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
+ # df.select(Polars.col("a").ewm_var(com: 1))
+ # # =>
+ # # shape: (3, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 0.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 0.5 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 0.928571 │
+ # # └──────────┘
+ def ewm_var(
+ com: nil,
+ span: nil,
+ half_life: nil,
+ alpha: nil,
+ adjust: true,
+ bias: false,
+ min_periods: 1
+ )
+ alpha = _prepare_alpha(com, span, half_life, alpha)
+ wrap_expr(_rbexpr.ewm_var(alpha, adjust, bias, min_periods))
+ end
+
+ # Extend the Series with given number of values.
+ #
+ # @param value [Object]
+ # The value to extend the Series with. This value may be nil to fill with
+ # nulls.
+ # @param n [Integer]
+ # The number of values to extend.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
+ # df.select(Polars.col("values").extend_constant(99, 2))
+ # # =>
+ # # shape: (5, 1)
+ # # ┌────────┐
+ # # │ values │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞════════╡
+ # # │ 1 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 2 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 3 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 99 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ 99 │
+ # # └────────┘
def extend_constant(value, n)
wrap_expr(_rbexpr.extend_constant(value, n))
end
+ # Count all unique values and create a struct mapping value to count.
+ #
+ # @param multithreaded [Boolean]
+ # Better to turn this off in the aggregation context, as it can lead to
+ # contention.
+ # @param sort [Boolean]
+ # Ensure the output is sorted from most values to least.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "id" => ["a", "b", "b", "c", "c", "c"]
+ # }
+ # )
+ # df.select(
+ # [
+ # Polars.col("id").value_counts(sort: true),
+ # ]
+ # )
+ # # =>
+ # # shape: (3, 1)
+ # # ┌───────────┐
+ # # │ id │
+ # # │ --- │
+ # # │ struct[2] │
+ # # ╞═══════════╡
+ # # │ {"c",3} │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ {"b",2} │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌┤
+ # # │ {"a",1} │
+ # # └───────────┘
def value_counts(multithreaded: false, sort: false)
wrap_expr(_rbexpr.value_counts(multithreaded, sort))
end
+ # Return a count of the unique values in the order of appearance.
+ #
+ # This method differs from `value_counts` in that it does not return the
+ # values, only the counts and might be faster
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "id" => ["a", "b", "b", "c", "c", "c"]
+ # }
+ # )
+ # df.select(
+ # [
+ # Polars.col("id").unique_counts
+ # ]
+ # )
+ # # =>
+ # # shape: (3, 1)
+ # # ┌─────┐
+ # # │ id │
+ # # │ --- │
+ # # │ u32 │
+ # # ╞═════╡
+ # # │ 1 │
+ # # ├╌╌╌╌╌┤
+ # # │ 2 │
+ # # ├╌╌╌╌╌┤
+ # # │ 3 │
+ # # └─────┘
def unique_counts
wrap_expr(_rbexpr.unique_counts)
end
+ # Compute the logarithm to a given base.
+ #
+ # @param base [Float]
+ # Given base, defaults to `e`.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
+ # df.select(Polars.col("a").log(2))
+ # # =>
+ # # shape: (3, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 0.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌┤
+ # # │ 1.584963 │
+ # # └──────────┘
def log(base = Math::E)
wrap_expr(_rbexpr.log(base))
end
- def entropy(base: 2, normalize: false)
+ # Computes the entropy.
+ #
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
+ #
+ # @param base [Float]
+ # Given base, defaults to `e`.
+ # @param normalize [Boolean]
+ # Normalize pk if it doesn't sum to 1.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new({"a" => [1, 2, 3]})
+ # df.select(Polars.col("a").entropy(base: 2))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌──────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞══════════╡
+ # # │ 1.459148 │
+ # # └──────────┘
+ #
+ # @example
+ # df.select(Polars.col("a").entropy(base: 2, normalize: false))
+ # # =>
+ # # shape: (1, 1)
+ # # ┌───────────┐
+ # # │ a │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞═══════════╡
+ # # │ -6.754888 │
+ # # └───────────┘
+ def entropy(base: 2, normalize: true)
wrap_expr(_rbexpr.entropy(base, normalize))
end
- # def cumulative_eval
- # end
+ # Run an expression over a sliding window that increases `1` slot every iteration.
+ #
+ # @param expr [Expr]
+ # Expression to evaluate
+ # @param min_periods [Integer]
+ # Number of valid values there should be in the window before the expression
+ # is evaluated. valid values = `length - null_count`
+ # @param parallel [Boolean]
+ # Run in parallel. Don't do this in a groupby or another operation that
+ # already has much parallelization.
+ #
+ # @return [Expr]
+ #
+ # @note
+ # This functionality is experimental and may change without it being considered a
+ # breaking change.
+ #
+ # @note
+ # This can be really slow as it can have `O(n^2)` complexity. Don't use this
+ # for operations that visit all elements.
+ #
+ # @example
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3, 4, 5]})
+ # df.select(
+ # [
+ # Polars.col("values").cumulative_eval(
+ # Polars.element.first - Polars.element.last ** 2
+ # )
+ # ]
+ # )
+ # # =>
+ # # shape: (5, 1)
+ # # ┌────────┐
+ # # │ values │
+ # # │ --- │
+ # # │ f64 │
+ # # ╞════════╡
+ # # │ 0.0 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ -3.0 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ -8.0 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ -15.0 │
+ # # ├╌╌╌╌╌╌╌╌┤
+ # # │ -24.0 │
+ # # └────────┘
+ def cumulative_eval(expr, min_periods: 1, parallel: false)
+ wrap_expr(
+ _rbexpr.cumulative_eval(expr._rbexpr, min_periods, parallel)
+ )
+ end
- # def set_sorted
+ # Flags the expression as 'sorted'.
+ #
+ # Enables downstream code to user fast paths for sorted arrays.
+ #
+ # @param reverse [Boolean]
+ # If the `Series` order is reversed, e.g. descending.
+ #
+ # @return [Expr]
+ #
+ # @note
+ # This can lead to incorrect results if this `Series` is not sorted!!
+ # Use with care!
+ #
+ # @example
+ # df = Polars::DataFrame.new({"values" => [1, 2, 3]})
+ # df.select(Polars.col("values").set_sorted.max)
+ # # =>
+ # # shape: (1, 1)
+ # # ┌────────┐
+ # # │ values │
+ # # │ --- │
+ # # │ i64 │
+ # # ╞════════╡
+ # # │ 3 │
+ # # └────────┘
+ # def set_sorted(reverse: false)
+ # map { |s| s.set_sorted(reverse) }
# end
+ # Aggregate to list.
#
+ # @return [Expr]
+ #
+ # @example
+ # df = Polars::DataFrame.new(
+ # {
+ # "a" => [1, 2, 3],
+ # "b" => [4, 5, 6]
+ # }
+ # )
+ # df.select(Polars.all.list)
+ # # =>
+ # # shape: (1, 2)
+ # # ┌───────────┬───────────┐
+ # # │ a ┆ b │
+ # # │ --- ┆ --- │
+ # # │ list[i64] ┆ list[i64] │
+ # # ╞═══════════╪═══════════╡
+ # # │ [1, 2, 3] ┆ [4, 5, 6] │
+ # # └───────────┴───────────┘
def list
wrap_expr(_rbexpr.list)
end
+ # Shrink numeric columns to the minimal required datatype.
+ #
+ # Shrink to the dtype needed to fit the extrema of this `Series`.
+ # This can be used to reduce memory pressure.
+ #
+ # @return [Expr]
+ #
+ # @example
+ # Polars::DataFrame.new(
+ # {
+ # "a" => [1, 2, 3],
+ # "b" => [1, 2, 2 << 32],
+ # "c" => [-1, 2, 1 << 30],
+ # "d" => [-112, 2, 112],
+ # "e" => [-112, 2, 129],
+ # "f" => ["a", "b", "c"],
+ # "g" => [0.1, 1.32, 0.12],
+ # "h" => [true, nil, false]
+ # }
+ # ).select(Polars.all.shrink_dtype)
+ # # =>
+ # # shape: (3, 8)
+ # # ┌─────┬────────────┬────────────┬──────┬──────┬─────┬──────┬───────┐
+ # # │ a ┆ b ┆ c ┆ d ┆ e ┆ f ┆ g ┆ h │
+ # # │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+ # # │ i8 ┆ i64 ┆ i32 ┆ i8 ┆ i16 ┆ str ┆ f32 ┆ bool │
+ # # ╞═════╪════════════╪════════════╪══════╪══════╪═════╪══════╪═══════╡
+ # # │ 1 ┆ 1 ┆ -1 ┆ -112 ┆ -112 ┆ a ┆ 0.1 ┆ true │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+ # # │ 2 ┆ 2 ┆ 2 ┆ 2 ┆ 2 ┆ b ┆ 1.32 ┆ null │
+ # # ├╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+ # # │ 3 ┆ 8589934592 ┆ 1073741824 ┆ 112 ┆ 129 ┆ c ┆ 0.12 ┆ false │
+ # # └─────┴────────────┴────────────┴──────┴──────┴─────┴──────┴───────┘
def shrink_dtype
wrap_expr(_rbexpr.shrink_dtype)
end
+ # Create an object namespace of all list related methods.
+ #
+ # @return [ListExpr]
def arr
ListExpr.new(self)
end
+ # Create an object namespace of all categorical related methods.
+ #
+ # @return [CatExpr]
def cat
CatExpr.new(self)
end
+ # Create an object namespace of all datetime related methods.
+ #
+ # @return [DateTimeExpr]
def dt
DateTimeExpr.new(self)
end
+ # Create an object namespace of all meta related expression methods.
+ #
+ # @return [MetaExpr]
def meta
MetaExpr.new(self)
end
+ # Create an object namespace of all string related methods.
+ #
+ # @return [StringExpr]
def str
StringExpr.new(self)
end
+ # Create an object namespace of all struct related methods.
+ #
+ # @return [StructExpr]
def struct
StructExpr.new(self)
end
private
@@ -1334,8 +5004,54 @@
_to_expr(other)._rbexpr
end
def _to_expr(other)
other.is_a?(Expr) ? other : Utils.lit(other)
+ end
+
+ def _prepare_alpha(com, span, half_life, alpha)
+ if [com, span, half_life, alpha].count { |v| !v.nil? } > 1
+ raise ArgumentError, "Parameters 'com', 'span', 'half_life', and 'alpha' are mutually exclusive"
+ end
+
+ if !com.nil?
+ if com < 0.0
+ raise ArgumentError, "Require 'com' >= 0 (found #{com})"
+ end
+ alpha = 1.0 / (1.0 + com)
+
+ elsif !span.nil?
+ if span < 1.0
+ raise ArgumentError, "Require 'span' >= 1 (found #{span})"
+ end
+ alpha = 2.0 / (span + 1.0)
+
+ elsif !half_life.nil?
+ if half_life <= 0.0
+ raise ArgumentError, "Require 'half_life' > 0 (found #{half_life})"
+ end
+ alpha = 1.0 - Math.exp(-Math.log(2.0) / half_life)
+
+ elsif alpha.nil?
+ raise ArgumentError, "One of 'com', 'span', 'half_life', or 'alpha' must be set"
+
+ elsif alpha <= 0 || alpha > 1
+ raise ArgumentError, "Require 0 < 'alpha' <= 1 (found #{alpha})"
+ end
+
+ alpha
+ end
+
+ def _prepare_rolling_window_args(window_size, min_periods)
+ if window_size.is_a?(Integer)
+ if min_periods.nil?
+ min_periods = window_size
+ end
+ window_size = "#{window_size}i"
+ end
+ if min_periods.nil?
+ min_periods = 1
+ end
+ [window_size, min_periods]
end
end
end