lib/polars/series.rb in polars-df-0.1.0 vs lib/polars/series.rb in polars-df-0.1.1

- old
+ new

@@ -43,18 +43,32 @@ def dtype _s.dtype.to_sym end + def flags + { + "SORTED_ASC" => _s.is_sorted_flag, + "SORTED_DESC" => _s.is_sorted_reverse_flag + } + end + + def inner_dtype + _s.inner_dtype&.to_sym + end + def name _s.name end def shape [_s.len] end + # def time_unit + # end + def to_s _s.to_s end alias_method :inspect, :to_s @@ -68,10 +82,28 @@ def ^(other) Utils.wrap_s(_s.bitxor(other._s)) end + # def ==(other) + # end + + # def !=(other) + # end + + # def >(other) + # end + + # def <(other) + # end + + # def >=(other) + # end + + # def <=(other) + # end + def +(other) Utils. wrap_s(_s.add(other._s)) end def -(other) @@ -84,30 +116,133 @@ def /(other) Utils.wrap_s(_s.div(other._s)) end + def **(power) + # if is_datelike + # raise ArgumentError, "first cast to integer before raising datelike dtypes to a power" + # end + to_frame.select(Polars.col(name).pow(power)).to_series + end + + # def -@(other) + # end + + def [](item) + _s.get_idx(item) + end + + # def []=(key, value) + # end + + def estimated_size(unit = "b") + sz = _s.estimated_size + Utils.scale_bytes(sz, to: unit) + end + + def sqrt + self ** 0.5 + end + + def any + to_frame.select(Polars.col(name).any).to_series[0] + end + + def all + to_frame.select(Polars.col(name).all).to_series[0] + end + + # def log + # end + + # def log10 + # end + + # def exp + # end + + # def drop_nulls + # end + + # def drop_nans + # end + def to_frame Utils.wrap_df(RbDataFrame.new([_s])) end + # def describe + # end + def sum _s.sum end def mean _s.mean end + def product + to_frame.select(Polars.col(name).product).to_series[0] + end + def min _s.min end def max _s.max end + # def nan_max + # end + + # def nan_min + # end + + def std(ddof: 1) + if !is_numeric + nil + else + to_frame.select(Polars.col(name).std(ddof: ddof)).to_series[0] + end + end + + def var(ddof: 1) + if !is_numeric + nil + else + to_frame.select(Polars.col(name).var(ddof: ddof)).to_series[0] + end + end + + def median + _s.median + end + + def quantile(quantile, interpolation: "nearest") + _s.quantile(quantile, interpolation) + end + + def to_dummies + Utils.wrap_df(_s.to_dummies) + end + + def value_counts(sort: false) + Utils.wrap_df(_s.value_counts(sort)) + end + + # def unique_counts + # end + + # def entropy + # end + + # def cumulative_eval + # end + def alias(name) s = dup s._s.rename(name) s end @@ -139,12 +274,16 @@ def cummax(reverse: false) Utils.wrap_s(_s.cummax(reverse)) end + def cumprod(reverse: false) + Utils.wrap_s(_s.cumprod(reverse)) + end + def limit(n = 10) - to_frame().select(Utils.col(name).limit(n)).to_series + to_frame.select(Utils.col(name).limit(n)).to_series end def slice(offset, length = nil) length = len if length.nil? Utils.wrap_s(_s.slice(offset, length)) @@ -165,34 +304,376 @@ def tail(n = 10) to_frame.select(Utils.col(name).tail(n)).to_series end + # def take_every + # end + def sort(reverse: false, in_place: false) if in_place self._s = _s.sort(reverse) self else Utils.wrap_s(_s.sort(reverse)) end end - def to_a - _s.to_a + # def top_k + # end + + # def arg_sort + # end + + # def argsort + # end + + # def arg_unique + # end + + def arg_min + _s.arg_min end + def arg_max + _s.arg_max + end + + # def search_sorted + # end + + # def unique + # end + + # def take + # end + + def null_count + _s.null_count + end + + def has_validity + _s.has_validity + end + + def is_empty + len == 0 + end + alias_method :empty?, :is_empty + + # def is_null + # end + + # def is_not_null + # end + + # def is_finite + # end + + # def is_infinite + # end + + # def is_nan + # end + + # def is_not_nan + # end + + # def is_in + # end + + # def arg_true + # end + + # def is_unique + # end + + # def is_first + # end + + # def is_duplicated + # end + + # def explode + # end + + def series_equal(other, null_equal: false, strict: false) + _s.series_equal(other._s, null_equal, strict) + end + def len _s.len end + # def cast + # end + + # def to_physical + # end + + def to_a + _s.to_a + end + def rechunk(in_place: false) opt_s = _s.rechunk(in_place) in_place ? self : Utils.wrap_s(opt_s) end + # def reverse + # end + + def is_numeric + [:i8, :i16, :i32, :i64, :u8, :u16, :u32, :u64, :f32, :f64].include?(dtype) + end + alias_method :numeric?, :is_numeric + + # def is_datelike + # end + + def is_float + [:f32, :f64].include?(dtype) + end + alias_method :float?, :is_float + + def is_bool + dtype == :bool + end + alias_method :bool?, :is_bool + + def is_utf8 + dtype == :str + end + alias_method :utf8?, :is_utf8 + + # def view + # end + + # def to_numo + # end + + # def set + # end + + # def set_at_idx + # end + + # def cleared + # end + + # clone handled by initialize_copy + + # def fill_nan + # end + + # def fill_null + # end + + def floor + Utils.wrap_s(_s.floor) + end + + def ceil + Utils.wrap_s(_s.ceil) + end + + # default to 0 like Ruby + def round(decimals = 0) + Utils.wrap_s(_s.round(decimals)) + end + + # def dot + # end + + # def mode + # end + + # def sign + # end + + # def sin + # end + + # def cos + # end + + # def tan + # end + + # def arcsin + # end + + # def arccos + # end + + # def arctan + # end + + # def arcsinh + # end + + # def arccosh + # end + + # def arctanh + # end + + # def sinh + # end + + # def cosh + # end + + # def tanh + # end + + # def apply + # end + + # def shift + # end + + # def shift_and_fill + # end + + # def zip_with + # end + + # def rolling_min + # end + + # def rolling_max + # end + + # def rolling_mean + # end + + # def rolling_sum + # end + + # def rolling_std + # end + + # def rolling_var + # end + + # def rolling_apply + # end + + # def rolling_median + # end + + # def rolling_quantile + # end + + # def rolling_skew + # end + + # def sample + # end + + def peak_max + Utils.wrap_s(_s.peak_max) + end + + def peak_min + Utils.wrap_s(_s.peak_min) + end + + def n_unique + _s.n_unique + end + + # def shrink_to_fit + # end + + # def _hash + # end + + # def reinterpret + # end + + # def interpolate + # end + + # def abs + # end + + # def rank + # end + + # def diff + # end + + # def pct_change + # end + + # def skew + # end + + # def kurtosis + # end + + # def clip + # end + + # def clip_min + # end + + # def clip_max + # end + + # def reshape + # end + + # def shuffle + # end + + # def ewm_mean + # end + + # def ewm_std + # end + + # def ewm_var + # end + + # def extend_constant + # end + + def set_sorted(reverse: false) + Utils.wrap_s(_s.set_sorted(reverse)) + end + + # def new_from_index + # end + + # def shrink_dtype + # end + + # def arr + # end + + # def cat + # end + + # def dt + # end + + # def str + # end + + # def struct + # end + private + def initialize_copy(other) + super + self._s = _s._clone + end + def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil) ruby_dtype = nil if (values.nil? || values.empty?) && dtype.nil? if dtype_if_empty @@ -203,21 +684,46 @@ # default to Float32 type dtype = "f32" end end + rb_temporal_types = [] + rb_temporal_types << Date if defined?(Date) + rb_temporal_types << DateTime if defined?(DateTime) + rb_temporal_types << Time if defined?(Time) + # _get_first_non_none value = values.find { |v| !v.nil? } - if !dtype.nil? && is_polars_dtype(dtype) && ruby_dtype.nil? + if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil? constructor = polars_type_to_constructor(dtype) rbseries = constructor.call(name, values, strict) return rbseries - end + else + if ruby_dtype.nil? + if value.nil? + # generic default dtype + ruby_dtype = Float + else + ruby_dtype = value.class + end + end - constructor = rb_type_to_constructor(value.class) - constructor.call(name, values, strict) + # temporal branch + if rb_temporal_types.include?(ruby_dtype) + # if dtype.nil? + # dtype = rb_type_to_dtype(ruby_dtype) + # elsif rb_temporal_types.include?(dtype) + # dtype = rb_type_to_dtype(dtype) + # end + + raise "todo" + else + constructor = rb_type_to_constructor(value.class) + constructor.call(name, values, strict) + end + end end POLARS_TYPE_TO_CONSTRUCTOR = { f32: RbSeries.method(:new_opt_f32), f64: RbSeries.method(:new_opt_f64), @@ -250,12 +756,8 @@ def rb_type_to_constructor(dtype) RB_TYPE_TO_CONSTRUCTOR.fetch(dtype) rescue KeyError # RbSeries.method(:new_object) raise ArgumentError, "Cannot determine type" - end - - def is_polars_dtype(data_type) - true end end end