lib/polars/expr.rb in polars-df-0.1.0 vs lib/polars/expr.rb in polars-df-0.1.1

- old
+ new

@@ -23,14 +23,34 @@ def |(other) wrap_expr(_rbexpr._or(_to_rbexpr(other))) end + def +(other) + wrap_expr(_rbexpr + _to_rbexpr(other)) + end + + def -(other) + wrap_expr(_rbexpr - _to_rbexpr(other)) + end + def *(other) wrap_expr(_rbexpr * _to_rbexpr(other)) end + def /(other) + wrap_expr(_rbexpr / _to_rbexpr(other)) + end + + def %(other) + wrap_expr(_rbexpr % _to_rbexpr(other)) + end + + def **(power) + pow(power) + end + def >=(other) wrap_expr(_rbexpr.gt_eq(_to_expr(other)._rbexpr)) end def <=(other) @@ -51,18 +71,77 @@ def >(other) wrap_expr(_rbexpr.gt(_to_expr(other)._rbexpr)) end + def -@ + Utils.lit(0) - self + end + + # def to_physical + # end + + def any + wrap_expr(_rbexpr.any) + end + + def all + wrap_expr(_rbexpr.all) + end + + def sqrt + self ** 0.5 + end + + def log10 + log(10) + end + + def exp + wrap_expr(_rbexpr.exp) + end + def alias(name) wrap_expr(_rbexpr._alias(name)) end + # TODO support symbols + def exclude(columns) + if columns.is_a?(String) + columns = [columns] + return wrap_expr(_rbexpr.exclude(columns)) + elsif !columns.is_a?(Array) + columns = [columns] + return wrap_expr(_rbexpr.exclude_dtype(columns)) + end + + if !columns.all? { |a| a.is_a?(String) } || !columns.all? { |a| Utils.is_polars_dtype(a) } + raise ArgumentError, "input should be all string or all DataType" + end + + if columns[0].is_a?(String) + wrap_expr(_rbexpr.exclude(columns)) + else + wrap_expr(_rbexpr.exclude_dtype(columns)) + end + end + + def keep_name + wrap_expr(_rbexpr.keep_name) + end + + def prefix(prefix) + wrap_expr(_rbexpr.prefix(prefix)) + end + def suffix(suffix) wrap_expr(_rbexpr.suffix(suffix)) end + # def map_alias + # end + def is_not wrap_expr(_rbexpr.is_not) end def is_null @@ -71,22 +150,136 @@ def is_not_null wrap_expr(_rbexpr.is_not_null) end + def is_finite + wrap_expr(_rbexpr.is_finite) + end + + def is_infinite + wrap_expr(_rbexpr.is_infinite) + end + + def is_nan + wrap_expr(_rbexpr.is_nan) + end + + def is_not_nan + wrap_expr(_rbexpr.is_not_nan) + end + + def agg_groups + wrap_expr(_rbexpr.agg_groups) + end + def count wrap_expr(_rbexpr.count) end def len count end + def slice(offset, length = nil) + if !offset.is_a?(Expr) + offset = Polars.lit(offset) + end + if !length.is_a?(Expr) + length = Polars.lit(length) + end + wrap_expr(_rbexpr.slice(offset._rbexpr, length._rbexpr)) + end + + def append(other, upcast: true) + other = Utils.expr_to_lit_or_expr(other) + wrap_expr(_rbexpr.append(other._rbexpr, upcast)) + end + + def rechunk + wrap_expr(_rbexpr.rechunk) + end + + def drop_nulls + wrap_expr(_rbexpr.drop_nulls) + end + + def drop_nans + wrap_expr(_rbexpr.drop_nans) + end + + def cumsum(reverse: false) + wrap_expr(_rbexpr.cumsum(reverse)) + end + + def cumprod(reverse: false) + wrap_expr(_rbexpr.cumprod(reverse)) + end + + def cummin(reverse: false) + wrap_expr(_rbexpr.cummin(reverse)) + end + + def cummax(reverse: false) + wrap_expr(_rbexpr.cummax(reverse)) + end + + def cumcount(reverse: false) + wrap_expr(_rbexpr.cumcount(reverse)) + end + + def floor + wrap_expr(_rbexpr.floor) + end + + def ceil + wrap_expr(_rbexpr.ceil) + end + + def round(decimals = 0) + wrap_expr(_rbexpr.round(decimals)) + end + + def dot(other) + other = Utils.expr_to_lit_or_expr(other, str_to_lit: false) + wrap_expr(_rbexpr.dot(other._rbexpr)) + end + + def mode + wrap_expr(_rbexpr.mode) + end + + def cast(dtype, strict: true) + dtype = Utils.rb_type_to_dtype(dtype) + wrap_expr(_rbexpr.cast(dtype, strict)) + end + def sort(reverse: false, nulls_last: false) wrap_expr(_rbexpr.sort_with(reverse, nulls_last)) end + def top_k(k: 5, reverse: false) + wrap_expr(_rbexpr.top_k(k, reverse)) + end + + def arg_sort(reverse: false, nulls_last: false) + wrap_expr(_rbexpr.arg_sort(reverse, nulls_last)) + end + + def arg_max + wrap_expr(_rbexpr.arg_max) + end + + def arg_min + wrap_expr(_rbexpr.arg_min) + end + + def search_sorted(element) + element = Utils.expr_to_lit_or_expr(element, str_to_lit: false) + wrap_expr(_rbexpr.search_sorted(element._rbexpr)) + end + def sort_by(by, reverse: false) if !by.is_a?(Array) by = [by] end if !reverse.is_a?(Array) @@ -95,10 +288,22 @@ by = Utils.selection_to_rbexpr_list(by) wrap_expr(_rbexpr.sort_by(by, reverse)) end + # def take + # end + + def shift(periods) + wrap_expr(_rbexpr.shift(periods)) + end + + def shift_and_fill(periods, fill_value) + fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true) + wrap_expr(_rbexpr.shift_and_fill(periods, fill_value._rbexpr)) + end + def fill_null(value = nil, strategy: nil, limit: nil) if !value.nil? && !strategy.nil? raise ArgumentError, "cannot specify both 'value' and 'strategy'." elsif value.nil? && strategy.nil? raise ArgumentError, "must specify either a fill 'value' or 'strategy'" @@ -117,10 +322,18 @@ def fill_nan(fill_value) fill_value = Utils.expr_to_lit_or_expr(fill_value, str_to_lit: true) wrap_expr(_rbexpr.fill_nan(fill_value._rbexpr)) end + def forward_fill(limit: nil) + wrap_expr(_rbexpr.forward_fill(limit)) + end + + def backward_fill(limit: nil) + wrap_expr(_rbexpr.backward_fill(limit)) + end + def reverse wrap_expr(_rbexpr.reverse) end def std(ddof: 1) @@ -165,10 +378,18 @@ def n_unique wrap_expr(_rbexpr.n_unique) end + def null_count + wrap_expr(_rbexpr.null_count) + end + + def arg_unique + wrap_expr(_rbexpr.arg_unique) + end + def unique(maintain_order: false) if maintain_order wrap_expr(_rbexpr.unique_stable) else wrap_expr(_rbexpr.unique) @@ -186,14 +407,52 @@ def over(expr) rbexprs = Utils.selection_to_rbexpr_list(expr) wrap_expr(_rbexpr.over(rbexprs)) end + def is_unique + wrap_expr(_rbexpr.is_unique) + end + + def is_first + wrap_expr(_rbexpr.is_first) + end + + def is_duplicated + wrap_expr(_rbexpr.is_duplicated) + end + + def quantile(quantile, interpolation: "nearest") + wrap_expr(_rbexpr.quantile(quantile, interpolation)) + end + def filter(predicate) wrap_expr(_rbexpr.filter(predicate._rbexpr)) end + def where(predicate) + filter(predicate) + end + + # def map + # end + + # def apply + # end + + def flatten + wrap_expr(_rbexpr.explode) + end + + def explode + wrap_expr(_rbexpr.explode) + end + + def take_every(n) + wrap_expr(_rbexpr.take_every(n)) + end + def head(n = 10) wrap_expr(_rbexpr.head(n)) end def tail(n = 10) @@ -202,19 +461,248 @@ def limit(n = 10) head(n) end + def pow(exponent) + exponent = Utils.expr_to_lit_or_expr(exponent) + wrap_expr(_rbexpr.pow(exponent._rbexpr)) + end + + # def is_in + # end + + def repeat_by(by) + by = Utils.expr_to_lit_or_expr(by, false) + wrap_expr(_rbexpr.repeat_by(by._rbexpr)) + end + + # def is_between + # end + + # def _hash + # end + + def reinterpret(signed: false) + wrap_expr(_rbexpr.reinterpret(signed)) + end + + # def _inspect + # end + def interpolate wrap_expr(_rbexpr.interpolate) end + # def rolling_min + # end + + # def rolling_max + # end + + # def rolling_mean + # end + + # def rolling_sum + # end + + # def rolling_std + # end + + # def rolling_var + # end + + # def rolling_median + # end + + # def rolling_quantile + # end + + # def rolling_apply + # end + + def rolling_skew(window_size, bias: true) + wrap_expr(_rbexpr.rolling_skew(window_size, bias)) + end + + def abs + wrap_expr(_rbexpr.abs) + end + + def argsort(reverse: false, nulls_last: false) + arg_sort(reverse: reverse, nulls_last: nulls_last) + end + + def rank(method: "average", reverse: false) + wrap_expr(_rbexpr.rank(method, reverse)) + end + + def diff(n: 1, null_behavior: "ignore") + wrap_expr(_rbexpr.diff(n, null_behavior)) + end + + def pct_change(n: 1) + wrap_expr(_rbexpr.pct_change(n)) + end + + def skew(bias: true) + wrap_expr(_rbexpr.skew(bias)) + end + + def kurtosis(fisher: true, bias: true) + wrap_expr(_rbexpr.kurtosis(fisher, bias)) + end + + def clip(min_val, max_val) + wrap_expr(_rbexpr.clip(min_val, max_val)) + end + + def clip_min(min_val) + wrap_expr(_rbexpr.clip_min(min_val)) + end + + def clip_max(max_val) + wrap_expr(_rbexpr.clip_max(max_val)) + end + + def lower_bound + wrap_expr(_rbexpr.lower_bound) + end + + def upper_bound + wrap_expr(_rbexpr.upper_bound) + end + + def sign + wrap_expr(_rbexpr.sign) + end + + def sin + wrap_expr(_rbexpr.sin) + end + + def cos + wrap_expr(_rbexpr.cos) + end + + def tan + wrap_expr(_rbexpr.tan) + end + + def arcsin + wrap_expr(_rbexpr.arcsin) + end + + def arccos + wrap_expr(_rbexpr.arccos) + end + + def arctan + wrap_expr(_rbexpr.arctan) + end + + def sinh + wrap_expr(_rbexpr.sinh) + end + + def cosh + wrap_expr(_rbexpr.cosh) + end + + def tanh + wrap_expr(_rbexpr.tanh) + end + + def arcsinh + wrap_expr(_rbexpr.arcsinh) + end + + def arccosh + wrap_expr(_rbexpr.arccosh) + end + + def arctanh + wrap_expr(_rbexpr.arctanh) + end + + def reshape(dims) + wrap_expr(_rbexpr.reshape(dims)) + end + + def shuffle(seed: nil) + if seed.nil? + seed = rand(10000) + end + wrap_expr(_rbexpr.shuffle(seed)) + end + + # def sample + # end + + # def ewm_mean + # end + + # def ewm_std + # end + + # def ewm_var + # end + + # def extend_constant + # end + + def value_counts(multithreaded: false, sort: false) + wrap_expr(_rbexpr.value_counts(multithreaded, sort)) + end + + def unique_counts + wrap_expr(_rbexpr.unique_counts) + end + + def log(base = Math::E) + wrap_expr(self._rbexpr.log(base)) + end + + def entropy(base: 2, normalize: false) + wrap_expr(_rbexpr.entropy(base, normalize)) + end + + # def cumulative_eval + # end + + # def set_sorted + # end + def list wrap_expr(_rbexpr.list) end + def shrink_dtype + wrap_expr(_rbexpr.shrink_dtype) + end + + def arr + ListExpr.new(self) + end + + def cat + CatExpr.new(self) + end + + def dt + DateTimeExpr.new(self) + end + + def meta + MetaExpr.new(self) + end + def str StringExpr.new(self) + end + + def struct + StructExpr.new(self) end private def wrap_expr(expr)