lib/polars/series.rb in polars-df-0.1.2 vs lib/polars/series.rb in polars-df-0.1.3

- old
+ new

@@ -1,8 +1,10 @@ module Polars # A Series represents a single column in a polars DataFrame. class Series + include ExprDispatch + # @private attr_accessor :_s # Create a new Series. # @@ -46,10 +48,12 @@ name = "" if name.nil? if values.nil? self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty) + elsif values.is_a?(Series) + self._s = series_to_rbseries(name, values) elsif values.is_a?(Range) self._s = Polars.arange( values.first, values.last + (values.exclude_end? ? 0 : 1), @@ -128,96 +132,184 @@ # Bitwise AND. # # @return [Series] def &(other) + if !other.is_a?(Series) + other = Series.new([other]) + end Utils.wrap_s(_s.bitand(other._s)) end # Bitwise OR. # # @return [Series] def |(other) + if !other.is_a?(Series) + other = Series.new([other]) + end Utils.wrap_s(_s.bitor(other._s)) end # Bitwise XOR. # # @return [Series] def ^(other) + if !other.is_a?(Series) + other = Series.new([other]) + end Utils.wrap_s(_s.bitxor(other._s)) end - # def ==(other) - # end + # Equal. + # + # @return [Series] + def ==(other) + _comp(other, :eq) + end - # def !=(other) - # end + # Not equal. + # + # @return [Series] + def !=(other) + _comp(other, :neq) + end - # def >(other) - # end + # Greater than. + # + # @return [Series] + def >(other) + _comp(other, :gt) + end - # def <(other) - # end + # Less than. + # + # @return [Series] + def <(other) + _comp(other, :lt) + end - # def >=(other) - # end + # Greater than or equal. + # + # @return [Series] + def >=(other) + _comp(other, :gt_eq) + end - # def <=(other) - # end + # Less than or equal. + # + # @return [Series] + def <=(other) + _comp(other, :lt_eq) + end # Performs addition. # # @return [Series] def +(other) - Utils. wrap_s(_s.add(other._s)) + _arithmetic(other, :add) end # Performs subtraction. # # @return [Series] def -(other) - Utils.wrap_s(_s.sub(other._s)) + _arithmetic(other, :sub) end # Performs multiplication. # # @return [Series] def *(other) - Utils.wrap_s(_s.mul(other._s)) + _arithmetic(other, :mul) end # Performs division. # # @return [Series] def /(other) - Utils.wrap_s(_s.div(other._s)) + _arithmetic(other, :div) end + # Returns the modulo. + # + # @return [Series] + def %(other) + if is_datelike + raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes" + end + _arithmetic(other, :rem) + end + # Raises to the power of exponent. # # @return [Series] def **(power) if is_datelike raise ArgumentError, "first cast to integer before raising datelike dtypes to a power" end to_frame.select(Polars.col(name).pow(power)).to_series end - # def -@(other) - # end + # Performs negation. + # + # @return [Series] + def -@ + 0 - self + end # Returns elements of the Series. # # @return [Object] def [](item) - _s.get_idx(item) + if item.is_a?(Integer) + return _s.get_idx(item) + end + + if item.is_a?(Range) + return Slice.new(self).apply(item) + end + + raise ArgumentError, "Cannot get item of type: #{item.class.name}" end - # def []=(key, value) - # end + # Sets an element of the Series. + # + # @return [Object] + def []=(key, value) + if value.is_a?(Array) + if is_numeric || is_datelike + set_at_idx(key, value) + return + end + raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value" + end + if key.is_a?(Series) + if key.dtype == :bool + self._s = set(key, value)._s + elsif key.dtype == :u64 + self._s = set_at_idx(key.cast(:u32), value)._s + elsif key.dtype == :u32 + self._s = set_at_idx(key, value)._s + else + raise Todo + end + end + + if key.is_a?(Array) + s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32)) + self[s] = value + elsif key.is_a?(Integer) + # TODO fix + # self[[key]] = value + set_at_idx(key, value) + else + raise ArgumentError, "cannot use #{key} for indexing" + end + end + # Return an estimation of the total (heap) allocated size of the Series. # # Estimated size is given in the specified unit (bytes by default). # # This estimation is the sum of the size of its buffers, validity, including @@ -266,35 +358,144 @@ # @return [Boolean] def all to_frame.select(Polars.col(name).all).to_series[0] end - # def log - # end + # Compute the logarithm to a given base. + # + # @param base [Float] + # Given base, defaults to `Math::E`. + # + # @return [Series] + def log(base = Math::E) + super + end - # def log10 - # end + # Compute the base 10 logarithm of the input array, element-wise. + # + # @return [Series] + def log10 + super + end - # def exp - # end + # Compute the exponential, element-wise. + # + # @return [Series] + def exp + super + end - # def drop_nulls - # end + # Create a new Series that copies data from this Series without null values. + # + # @return [Series] + def drop_nulls + super + end - # def drop_nans - # end + # Drop NaN values. + # + # @return [Series] + def drop_nans + super + end # Cast this Series to a DataFrame. # # @return [DataFrame] def to_frame Utils.wrap_df(RbDataFrame.new([_s])) end - # def describe - # end + # Quick summary statistics of a series. + # + # Series with mixed datatypes will return summary statistics for the datatype of + # the first value. + # + # @return [DataFrame] + # + # @example + # series_num = Polars::Series.new([1, 2, 3, 4, 5]) + # series_num.describe + # # => + # # shape: (6, 2) + # # ┌────────────┬──────────┐ + # # │ statistic ┆ value │ + # # │ --- ┆ --- │ + # # │ str ┆ f64 │ + # # ╞════════════╪══════════╡ + # # │ min ┆ 1.0 │ + # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤ + # # │ max ┆ 5.0 │ + # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤ + # # │ null_count ┆ 0.0 │ + # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤ + # # │ mean ┆ 3.0 │ + # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤ + # # │ std ┆ 1.581139 │ + # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤ + # # │ count ┆ 5.0 │ + # # └────────────┴──────────┘ + # + # @example + # series_str = Polars::Series.new(["a", "a", nil, "b", "c"]) + # series_str.describe + # # => + # # shape: (3, 2) + # # ┌────────────┬───────┐ + # # │ statistic ┆ value │ + # # │ --- ┆ --- │ + # # │ str ┆ i64 │ + # # ╞════════════╪═══════╡ + # # │ unique ┆ 4 │ + # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤ + # # │ null_count ┆ 1 │ + # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤ + # # │ count ┆ 5 │ + # # └────────────┴───────┘ + def describe + if len == 0 + raise ArgumentError, "Series must contain at least one value" + elsif is_numeric + s = cast(:f64) + stats = { + "min" => s.min, + "max" => s.max, + "null_count" => s.null_count, + "mean" => s.mean, + "std" => s.std, + "count" => s.len + } + elsif is_boolean + stats = { + "sum" => sum, + "null_count" => null_count, + "count" => len + } + elsif is_utf8 + stats = { + "unique" => unique.length, + "null_count" => null_count, + "count" => len + } + elsif is_datelike + # we coerce all to string, because a polars column + # only has a single dtype and dates: datetime and count: int don't match + stats = { + "min" => dt.min.to_s, + "max" => dt.max.to_s, + "null_count" => null_count.to_s, + "count" => len.to_s + } + else + raise TypeError, "This type is not supported" + end + Polars::DataFrame.new( + {"statistic" => stats.keys, "value" => stats.values} + ) + end + # Reduce this Series to the sum value. # # @return [Numeric] # # @note @@ -350,15 +551,23 @@ # # => 3 def max _s.max end - # def nan_max - # end + # Get maximum value, but propagate/poison encountered NaN values. + # + # @return [Object] + def nan_max + to_frame.select(Polars.col(name).nan_max)[0, 0] + end - # def nan_min - # end + # Get minimum value, but propagate/poison encountered NaN values. + # + # @return [Object] + def nan_min + to_frame.select(Polars.col(name).nan_min)[0, 0] + end # Get the standard deviation of this Series. # # @param ddof [Integer] # “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, @@ -476,15 +685,52 @@ # # └─────┴────────┘ def value_counts(sort: false) Utils.wrap_df(_s.value_counts(sort)) end - # def unique_counts - # end + # Return a count of the unique values in the order of appearance. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("id", ["a", "b", "b", "c", "c", "c"]) + # s.unique_counts + # # => + # # shape: (3,) + # # Series: 'id' [u32] + # # [ + # # 1 + # # 2 + # # 3 + # # ] + def unique_counts + super + end - # def entropy - # end + # Computes the entropy. + # + # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities. + # + # @param base [Float] + # Given base, defaults to `e` + # @param normalize [Boolean] + # Normalize pk if it doesn't sum to 1. + # + # @return [Float, nil] + # + # @example + # a = Polars::Series.new([0.99, 0.005, 0.005]) + # a.entropy(normalize: true) + # # => 0.06293300616044681 + # + # @example + # b = Polars::Series.new([0.65, 0.10, 0.25]) + # b.entropy(normalize: true) + # # => 0.8568409950394724 + def entropy(base: Math::E, normalize: false) + Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0] + end # def cumulative_eval # end # Return a copy of the Series with a new alias/name. @@ -583,11 +829,11 @@ # # 1 # # 3 # # 6 # # ] def cumsum(reverse: false) - Utils.wrap_s(_s.cumsum(reverse)) + super end # Get an array with the cumulative min computed at every element. # # @param reverse [Boolean] @@ -605,11 +851,11 @@ # # 3 # # 3 # # 1 # # ] def cummin(reverse: false) - Utils.wrap_s(_s.cummin(reverse)) + super end # Get an array with the cumulative max computed at every element. # # @param reverse [Boolean] @@ -627,11 +873,11 @@ # # 3 # # 5 # # 5 # # ] def cummax(reverse: false) - Utils.wrap_s(_s.cummax(reverse)) + super end # Get an array with the cumulative product computed at every element. # # @param reverse [Boolean] @@ -653,11 +899,11 @@ # # 1 # # 2 # # 6 # # ] def cumprod(reverse: false) - Utils.wrap_s(_s.cumprod(reverse)) + super end # Get the first `n` rows. # # Alias for {#head}. @@ -700,12 +946,11 @@ # # [ # # 2 # # 3 # # ] def slice(offset, length = nil) - length = len if length.nil? - Utils.wrap_s(_s.slice(offset, length)) + super end # Append a Series to this one. # # @param other [Series] @@ -833,12 +1078,27 @@ # # ] def tail(n = 10) to_frame.select(Utils.col(name).tail(n)).to_series end - # def take_every - # end + # Take every nth value in the Series and return as new Series. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3, 4]) + # s.take_every(2) + # # => + # # shape: (2,) + # # Series: 'a' [i64] + # # [ + # # 1 + # # 3 + # # ] + def take_every(n) + super + end # Sort this Series. # # @param reverse [Boolean] # Reverse sort. @@ -876,21 +1136,82 @@ else Utils.wrap_s(_s.sort(reverse)) end end - # def top_k - # end + # Return the `k` largest elements. + # + # If `reverse: true`, the smallest elements will be given. + # + # @param k [Integer] + # Number of elements to return. + # @param reverse [Boolean] + # Return the smallest elements. + # + # @return [Boolean] + def top_k(k: 5, reverse: false) + super + end - # def arg_sort - # end + # Get the index values that would sort this Series. + # + # @param reverse [Boolean] + # Sort in reverse (descending) order. + # @param nulls_last [Boolean] + # Place null values last instead of first. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [5, 3, 4, 1, 2]) + # s.arg_sort + # # => + # # shape: (5,) + # # Series: 'a' [u32] + # # [ + # # 3 + # # 4 + # # 1 + # # 2 + # # 0 + # # ] + def arg_sort(reverse: false, nulls_last: false) + super + end - # def argsort - # end + # Get the index values that would sort this Series. + # + # Alias for {#arg_sort}. + # + # @param reverse [Boolean] + # Sort in reverse (descending) order. + # @param nulls_last [Boolean] + # Place null values last instead of first. + # + # @return [Series] + def argsort(reverse: false, nulls_last: false) + super + end - # def arg_unique - # end + # Get unique index as Series. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 2, 3]) + # s.arg_unique + # # => + # # shape: (3,) + # # Series: 'a' [u32] + # # [ + # # 0 + # # 1 + # # 3 + # # ] + def arg_unique + super + end # Get the index of the minimal value. # # @return [Integer, nil] # @@ -912,27 +1233,71 @@ # # => 0 def arg_max _s.arg_max end - # def search_sorted - # end + # Find indices where elements should be inserted to maintain order. + # + # @param element [Object] + # Expression or scalar value. + # + # @return [Integer] + def search_sorted(element) + Polars.select(Polars.lit(self).search_sorted(element))[0, 0] + end - # def unique - # end + # Get unique elements in series. + # + # @param maintain_order [Boolean] + # Maintain order of data. This requires more work. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 2, 3]) + # s.unique.sort + # # => + # # shape: (3,) + # # Series: 'a' [i64] + # # [ + # # 1 + # # 2 + # # 3 + # # ] + def unique(maintain_order: false) + super + end - # def take - # end + # Take values by index. + # + # @param indices [Array] + # Index location used for selection. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3, 4]) + # s.take([1, 3]) + # # => + # # shape: (2,) + # # Series: 'a' [i64] + # # [ + # # 2 + # # 4 + # # ] + def take(indices) + to_frame.select(Polars.col(name).take(indices)).to_series + end # Count the null values in this Series. # # @return [Integer] def null_count _s.null_count end - # Return True if the Series has a validity bitmask. + # Return `true` if the Series has a validity bitmask. # # If there is none, it means that there are no null values. # Use this to swiftly assert a Series does not have null values. # # @return [Boolean] @@ -951,45 +1316,218 @@ def is_empty len == 0 end alias_method :empty?, :is_empty - # def is_null - # end + # Returns a boolean Series indicating which values are null. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil]) + # s.is_null + # # => + # # shape: (4,) + # # Series: 'a' [bool] + # # [ + # # false + # # false + # # false + # # true + # # ] + def is_null + super + end - # def is_not_null - # end + # Returns a boolean Series indicating which values are not null. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil]) + # s.is_not_null + # # => + # # shape: (4,) + # # Series: 'a' [bool] + # # [ + # # true + # # true + # # true + # # false + # # ] + def is_not_null + super + end - # def is_finite - # end + # Returns a boolean Series indicating which values are finite. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY]) + # s.is_finite + # # => + # # shape: (3,) + # # Series: 'a' [bool] + # # [ + # # true + # # true + # # false + # # ] + def is_finite + super + end - # def is_infinite - # end + # Returns a boolean Series indicating which values are infinite. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY]) + # s.is_infinite + # # => + # # shape: (3,) + # # Series: 'a' [bool] + # # [ + # # false + # # false + # # true + # # ] + def is_infinite + super + end - # def is_nan - # end + # Returns a boolean Series indicating which values are NaN. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN]) + # s.is_nan + # # => + # # shape: (4,) + # # Series: 'a' [bool] + # # [ + # # false + # # false + # # false + # # true + # # ] + def is_nan + super + end - # def is_not_nan - # end + # Returns a boolean Series indicating which values are not NaN. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN]) + # s.is_not_nan + # # => + # # shape: (4,) + # # Series: 'a' [bool] + # # [ + # # true + # # true + # # true + # # false + # # ] + def is_not_nan + super + end # def is_in # end - # def arg_true - # end + # Get index values where Boolean Series evaluate `true`. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3]) + # (s == 2).arg_true + # # => + # # shape: (1,) + # # Series: 'a' [u32] + # # [ + # # 1 + # # ] + def arg_true + Polars.arg_where(self, eager: true) + end - # def is_unique - # end + # Get mask of all unique values. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 2, 3]) + # s.is_unique + # # => + # # shape: (4,) + # # Series: 'a' [bool] + # # [ + # # true + # # false + # # false + # # true + # # ] + def is_unique + super + end - # def is_first - # end + # Get a mask of the first unique value. + # + # @return [Series] + def is_first + super + end - # def is_duplicated - # end + # Get mask of all duplicated values. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 2, 3]) + # s.is_duplicated + # # => + # # shape: (4,) + # # Series: 'a' [bool] + # # [ + # # false + # # true + # # true + # # false + # # ] + def is_duplicated + super + end - # def explode - # end + # Explode a list or utf8 Series. + # + # This means that every item is expanded to a new row. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [[1, 2], [3, 4], [9, 10]]) + # s.explode + # # => + # # shape: (6,) + # # Series: 'a' [i64] + # # [ + # # 1 + # # 2 + # # 3 + # # 4 + # # 9 + # # 10 + # # ] + def explode + super + end # Check if series is equal with another Series. # # @param other [Series] # Series to compare with. @@ -1023,12 +1561,33 @@ def len _s.len end alias_method :length, :len - # def cast - # end + # Cast between data types. + # + # @param dtype [Symbol] + # DataType to cast to + # @param strict [Boolean] + # Throw an error if a cast could not be done for instance due to an overflow + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [true, false, true]) + # s.cast(:u32) + # # => + # # shape: (3,) + # # Series: 'a' [u32] + # # [ + # # 1 + # # 0 + # # 1 + # # ] + def cast(dtype, strict: true) + super + end # def to_physical # end # Convert this Series to a Ruby Array. This operation clones data. @@ -1052,12 +1611,28 @@ def rechunk(in_place: false) opt_s = _s.rechunk(in_place) in_place ? self : Utils.wrap_s(opt_s) end - # def reverse - # end + # Return Series in reverse order. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3], dtype: :i8) + # s.reverse + # # => + # # shape: (3,) + # # Series: 'a' [i8] + # # [ + # # 3 + # # 2 + # # 1 + # # ] + def reverse + super + end # Check if this Series datatype is numeric. # # @return [Boolean] # @@ -1130,23 +1705,147 @@ # end # def set # end - # def set_at_idx - # end + # Set values at the index locations. + # + # @param idx [Object] + # Integers representing the index locations. + # @param value [Object] + # Replacement values. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3]) + # s.set_at_idx(1, 10) + # # => + # # shape: (3,) + # # Series: 'a' [i64] + # # [ + # # 1 + # # 10 + # # 3 + # # ] + def set_at_idx(idx, value) + if idx.is_a?(Integer) + idx = [idx] + end + if idx.length == 0 + return self + end - # def cleared - # end + idx = Series.new("", idx) + if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(String) || value.nil? + value = Series.new("", [value]) + # if we need to set more than a single value, we extend it + if idx.length > 0 + value = value.extend_constant(value[0], idx.length - 1) + end + elsif !value.is_a?(Series) + value = Series.new("", value) + end + _s.set_at_idx(idx._s, value._s) + self + end + + # Create an empty copy of the current Series. + # + # The copy has identical name/dtype but no data. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [nil, true, false]) + # s.cleared + # # => + # # shape: (0,) + # # Series: 'a' [bool] + # # [ + # # ] + def cleared + len > 0 ? limit(0) : clone + end + # clone handled by initialize_copy - # def fill_nan - # end + # Fill floating point NaN value with a fill value. + # + # @param fill_value [Object] + # Value used to fill nan values. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN]) + # s.fill_nan(0) + # # => + # # shape: (4,) + # # Series: 'a' [f64] + # # [ + # # 1.0 + # # 2.0 + # # 3.0 + # # 0.0 + # # ] + def fill_nan(fill_value) + super + end - # def fill_null - # end + # Fill null values using the specified value or strategy. + # + # @param value [Object] + # Value used to fill null values. + # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"] + # Strategy used to fill null values. + # @param limit + # Number of consecutive null values to fill when using the "forward" or + # "backward" strategy. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3, nil]) + # s.fill_null(strategy: "forward") + # # => + # # shape: (4,) + # # Series: 'a' [i64] + # # [ + # # 1 + # # 2 + # # 3 + # # 3 + # # ] + # + # @example + # s.fill_null(strategy: "min") + # # => + # # shape: (4,) + # # Series: 'a' [i64] + # # [ + # # 1 + # # 2 + # # 3 + # # 1 + # # ] + # + # @example + # s = Polars::Series.new("b", ["x", nil, "z"]) + # s.fill_null(Polars.lit("")) + # # => + # # shape: (3,) + # # Series: 'b' [str] + # # [ + # # "x" + # # "" + # # "z" + # # ] + def fill_null(value = nil, strategy: nil, limit: nil) + super + end # Rounds down to the nearest integer value. # # Only works on floating point Series. # @@ -1183,11 +1882,11 @@ # # 2.0 # # 3.0 # # 4.0 # # ] def ceil - Utils.wrap_s(_s.ceil) + super end # Round underlying floating point data by `decimals` digits. # # @param decimals [Integer] @@ -1205,69 +1904,380 @@ # # 1.12 # # 2.57 # # 3.9 # # ] def round(decimals = 0) - Utils.wrap_s(_s.round(decimals)) + super end # def dot # end - # def mode - # end + # Compute the most occurring value(s). + # + # Can return multiple Values. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 2, 3]) + # s.mode + # # => + # # shape: (1,) + # # Series: 'a' [i64] + # # [ + # # 2 + # # ] + def mode + super + end - # def sign - # end + # Compute the element-wise indication of the sign. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [-9.0, -0.0, 0.0, 4.0, nil]) + # s.sign + # # => + # # shape: (5,) + # # Series: 'a' [i64] + # # [ + # # -1 + # # 0 + # # 0 + # # 1 + # # null + # # ] + def sign + super + end - # def sin - # end + # Compute the element-wise value for the sine. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI]) + # s.sin + # # => + # # shape: (3,) + # # Series: 'a' [f64] + # # [ + # # 0.0 + # # 1.0 + # # 1.2246e-16 + # # ] + def sin + super + end - # def cos - # end + # Compute the element-wise value for the cosine. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI]) + # s.cos + # # => + # # shape: (3,) + # # Series: 'a' [f64] + # # [ + # # 1.0 + # # 6.1232e-17 + # # -1.0 + # # ] + def cos + super + end - # def tan - # end + # Compute the element-wise value for the tangent. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI]) + # s.tan + # # => + # # shape: (3,) + # # Series: 'a' [f64] + # # [ + # # 0.0 + # # 1.6331e16 + # # -1.2246e-16 + # # ] + def tan + super + end - # def arcsin - # end + # Compute the element-wise value for the inverse sine. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 0.0, -1.0]) + # s.arcsin + # # => + # # shape: (3,) + # # Series: 'a' [f64] + # # [ + # # 1.570796 + # # 0.0 + # # -1.570796 + # # ] + def arcsin + super + end - # def arccos - # end + # Compute the element-wise value for the inverse cosine. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 0.0, -1.0]) + # s.arccos + # # => + # # shape: (3,) + # # Series: 'a' [f64] + # # [ + # # 0.0 + # # 1.570796 + # # 3.141593 + # # ] + def arccos + super + end - # def arctan - # end + # Compute the element-wise value for the inverse tangent. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 0.0, -1.0]) + # s.arctan + # # => + # # shape: (3,) + # # Series: 'a' [f64] + # # [ + # # 0.785398 + # # 0.0 + # # -0.785398 + # # ] + def arctan + super + end - # def arcsinh - # end + # Compute the element-wise value for the inverse hyperbolic sine. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 0.0, -1.0]) + # s.arcsinh + # # => + # # shape: (3,) + # # Series: 'a' [f64] + # # [ + # # 0.881374 + # # 0.0 + # # -0.881374 + # # ] + def arcsinh + super + end - # def arccosh - # end + # Compute the element-wise value for the inverse hyperbolic cosine. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [5.0, 1.0, 0.0, -1.0]) + # s.arccosh + # # => + # # shape: (4,) + # # Series: 'a' [f64] + # # [ + # # 2.292432 + # # 0.0 + # # NaN + # # NaN + # # ] + def arccosh + super + end - # def arctanh - # end + # Compute the element-wise value for the inverse hyperbolic tangent. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [2.0, 1.0, 0.5, 0.0, -0.5, -1.0, -1.1]) + # s.arctanh + # # => + # # shape: (7,) + # # Series: 'a' [f64] + # # [ + # # NaN + # # inf + # # 0.549306 + # # 0.0 + # # -0.549306 + # # -inf + # # NaN + # # ] + def arctanh + super + end - # def sinh - # end + # Compute the element-wise value for the hyperbolic sine. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 0.0, -1.0]) + # s.sinh + # # => + # # shape: (3,) + # # Series: 'a' [f64] + # # [ + # # 1.175201 + # # 0.0 + # # -1.175201 + # # ] + def sinh + super + end - # def cosh - # end + # Compute the element-wise value for the hyperbolic cosine. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 0.0, -1.0]) + # s.cosh + # # => + # # shape: (3,) + # # Series: 'a' [f64] + # # [ + # # 1.543081 + # # 1.0 + # # 1.543081 + # # ] + def cosh + super + end - # def tanh - # end + # Compute the element-wise value for the hyperbolic tangent. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1.0, 0.0, -1.0]) + # s.tanh + # # => + # # shape: (3,) + # # Series: 'a' [f64] + # # [ + # # 0.761594 + # # 0.0 + # # -0.761594 + # # ] + def tanh + super + end # def apply # end - # def shift - # end + # Shift the values by a given period. + # + # @param periods [Integer] + # Number of places to shift (may be negative). + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3]) + # s.shift(1) + # # => + # # shape: (3,) + # # Series: 'a' [i64] + # # [ + # # null + # # 1 + # # 2 + # # ] + # + # @example + # s.shift(-1) + # # => + # # shape: (3,) + # # Series: 'a' [i64] + # # [ + # # 2 + # # 3 + # # null + # # ] + def shift(periods = 1) + super + end - # def shift_and_fill - # end + # Shift the values by a given period and fill the resulting null values. + # + # @param periods [Integer] + # Number of places to shift (may be negative). + # @param fill_value [Object] + # Fill None values with the result of this expression. + # + # @return [Series] + def shift_and_fill(periods, fill_value) + super + end - # def zip_with - # end + # Take values from self or other based on the given mask. + # + # Where mask evaluates true, take values from self. Where mask evaluates false, + # take values from other. + # + # @param mask [Series] + # Boolean Series. + # @param other [Series] + # Series of same type. + # + # @return [Series] + # + # @example + # s1 = Polars::Series.new([1, 2, 3, 4, 5]) + # s2 = Polars::Series.new([5, 4, 3, 2, 1]) + # s1.zip_with(s1 < s2, s2) + # # => + # # shape: (5,) + # # Series: '' [i64] + # # [ + # # 1 + # # 2 + # # 3 + # # 2 + # # 1 + # # ] + # + # @example + # mask = Polars::Series.new([true, false, true, false, true]) + # s1.zip_with(mask, s2) + # # => + # # shape: (5,) + # # Series: '' [i64] + # # [ + # # 1 + # # 4 + # # 3 + # # 2 + # # 5 + # # ] + def zip_with(mask, other) + Utils.wrap_s(_s.zip_with(mask._s, other._s)) + end # def rolling_min # end # def rolling_max @@ -1352,66 +2362,252 @@ # # => 3 def n_unique _s.n_unique end - # def shrink_to_fit - # end + # Shrink Series memory usage. + # + # Shrinks the underlying array capacity to exactly fit the actual data. + # (Note that this function does not change the Series data type). + # + # @return [Series] + def shrink_to_fit(in_place: false) + if in_place + _s.shrink_to_fit + self + else + series = clone + series._s.shrink_to_fit + series + end + end # def _hash # end - # def reinterpret - # end + # Reinterpret the underlying bits as a signed/unsigned integer. + # + # This operation is only allowed for 64bit integers. For lower bits integers, + # you can safely use that cast operation. + # + # @param signed [Boolean] + # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`. + # + # @return [Series] + def reinterpret(signed: true) + super + end - # def interpolate - # end + # Interpolate intermediate values. The interpolation method is linear. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, nil, nil, 5]) + # s.interpolate + # # => + # # shape: (5,) + # # Series: 'a' [i64] + # # [ + # # 1 + # # 2 + # # 3 + # # 4 + # # 5 + # # ] + def interpolate + super + end - # def abs - # end + # Compute absolute values. + # + # @return [Series] + def abs + super + end # def rank # end - # def diff - # end + # Calculate the n-th discrete difference. + # + # @param n [Integer] + # Number of slots to shift. + # @param null_behavior ["ignore", "drop"] + # How to handle null values. + # + # @return [Series] + def diff(n: 1, null_behavior: "ignore") + super + end # def pct_change # end - # def skew - # end + # Compute the sample skewness of a data set. + # + # For normally distributed data, the skewness should be about zero. For + # unimodal continuous distributions, a skewness value greater than zero means + # that there is more weight in the right tail of the distribution. The + # function `skewtest` can be used to determine if the skewness value + # is close enough to zero, statistically speaking. + # + # @param bias [Boolean] + # If `false`, the calculations are corrected for statistical bias. + # + # @return [Float, nil] + def skew(bias: true) + _s.skew(bias) + end - # def kurtosis - # end + # Compute the kurtosis (Fisher or Pearson) of a dataset. + # + # Kurtosis is the fourth central moment divided by the square of the + # variance. If Fisher's definition is used, then 3.0 is subtracted from + # the result to give 0.0 for a normal distribution. + # If bias is false, then the kurtosis is calculated using k statistics to + # eliminate bias coming from biased moment estimators + # + # @param fisher [Boolean] + # If `true`, Fisher's definition is used (normal ==> 0.0). If `false`, + # Pearson's definition is used (normal ==> 3.0). + # @param bias [Boolean] + # If `false`, the calculations are corrected for statistical bias. + # + # @return [Float, nil] + def kurtosis(fisher: true, bias: true) + _s.kurtosis(fisher, bias) + end - # def clip - # end + # Clip (limit) the values in an array to a `min` and `max` boundary. + # + # Only works for numerical types. + # + # If you want to clip other dtypes, consider writing a "when, then, otherwise" + # expression. See {#when} for more information. + # + # @param min_val [Numeric] + # Minimum value. + # @param max_val [Numeric] + # Maximum value. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("foo", [-50, 5, nil, 50]) + # s.clip(1, 10) + # # => + # # shape: (4,) + # # Series: 'foo' [i64] + # # [ + # # 1 + # # 5 + # # null + # # 10 + # # ] + def clip(min_val, max_val) + super + end - # def clip_min - # end + # Clip (limit) the values in an array to a `min` boundary. + # + # Only works for numerical types. + # + # If you want to clip other dtypes, consider writing a "when, then, otherwise" + # expression. See {#when} for more information. + # + # @param min_val [Numeric] + # Minimum value. + # + # @return [Series] + def clip_min(min_val) + super + end - # def clip_max - # end + # Clip (limit) the values in an array to a `max` boundary. + # + # Only works for numerical types. + # + # If you want to clip other dtypes, consider writing a "when, then, otherwise" + # expression. See {#when} for more information. + # + # @param max_val [Numeric] + # Maximum value. + # + # @return [Series] + def clip_max(max_val) + super + end - # def reshape - # end + # Reshape this Series to a flat Series or a Series of Lists. + # + # @param dims [Array] + # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that + # dimension is inferred. + # + # @return [Series] + def reshape(dims) + super + end - # def shuffle - # end + # Shuffle the contents of this Series. + # + # @param seed [Integer, nil] + # Seed for the random number generator. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3]) + # s.shuffle(seed: 1) + # # => + # # shape: (3,) + # # Series: 'a' [i64] + # # [ + # # 2 + # # 1 + # # 3 + # # ] + def shuffle(seed: nil) + super + end # def ewm_mean # end # def ewm_std # end # def ewm_var # end - # def extend_constant - # end + # Extend the Series with given number of values. + # + # @param value [Object] + # The value to extend the Series with. This value may be `nil` to fill with + # nulls. + # @param n [Integer] + # The number of values to extend. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3]) + # s.extend_constant(99, 2) + # # => + # # shape: (5,) + # # Series: 'a' [i64] + # # [ + # # 1 + # # 2 + # # 3 + # # 99 + # # 99 + # # ] + def extend_constant(value, n) + super + end # Flags the Series as sorted. # # Enables downstream code to user fast paths for sorted arrays. # @@ -1430,15 +2626,26 @@ # # => 3 def set_sorted(reverse: false) Utils.wrap_s(_s.set_sorted(reverse)) end - # def new_from_index - # end + # Create a new Series filled with values from the given index. + # + # @return [Series] + def new_from_index(index, length) + Utils.wrap_s(_s.new_from_index(index, length)) + end - # def shrink_dtype - # end + # Shrink numeric columns to the minimal required datatype. + # + # Shrink to the dtype needed to fit the extrema of this Series. + # This can be used to reduce memory pressure. + # + # @return [Series] + def shrink_dtype + super + end # def arr # end # def cat @@ -1458,31 +2665,69 @@ def initialize_copy(other) super self._s = _s._clone end + def coerce(other) + if other.is_a?(Numeric) + # TODO improve + series = to_frame.select(Polars.lit(other)).to_series + [series, self] + else + raise TypeError, "#{self.class} can't be coerced into #{other.class}" + end + end + + def _comp(other, op) + if other.is_a?(Series) + return Utils.wrap_s(_s.send(op, other._s)) + end + + if dtype == :str + raise Todo + end + Utils.wrap_s(_s.send("#{op}_#{dtype}", other)) + end + + def _arithmetic(other, op) + if other.is_a?(Expr) + other = to_frame.select(other).to_series + end + if other.is_a?(Series) + return Utils.wrap_s(_s.send(op, other._s)) + end + + raise Todo + end + + def series_to_rbseries(name, values) + # should not be in-place? + values.rename(name, in_place: true) + values._s + end + def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil) ruby_dtype = nil + nested_dtype = nil if (values.nil? || values.empty?) && dtype.nil? if dtype_if_empty # if dtype for empty sequence could be guessed # (e.g comparisons between self and other) dtype = dtype_if_empty else # default to Float32 type - dtype = "f32" + dtype = :f32 end end rb_temporal_types = [] rb_temporal_types << Date if defined?(Date) rb_temporal_types << DateTime if defined?(DateTime) rb_temporal_types << Time if defined?(Time) - # _get_first_non_none - value = values.find { |v| !v.nil? } + value = _get_first_non_none(values) if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil? constructor = polars_type_to_constructor(dtype) rbseries = constructor.call(name, values, strict) return rbseries @@ -1502,10 +2747,25 @@ # dtype = rb_type_to_dtype(ruby_dtype) # elsif rb_temporal_types.include?(dtype) # dtype = rb_type_to_dtype(dtype) # end + if ruby_dtype == Date + RbSeries.new_opt_date(name, values, strict) + else + raise Todo + end + elsif ruby_dtype == Array + if nested_dtype.nil? + nested_value = _get_first_non_none(value) + nested_dtype = nested_value.nil? ? Float : nested_value.class + end + + if nested_dtype == Array + raise Todo + end + raise Todo else constructor = rb_type_to_constructor(value.class) constructor.call(name, values, strict) end @@ -1544,8 +2804,12 @@ def rb_type_to_constructor(dtype) RB_TYPE_TO_CONSTRUCTOR.fetch(dtype) rescue KeyError # RbSeries.method(:new_object) raise ArgumentError, "Cannot determine type" + end + + def _get_first_non_none(values) + values.find { |v| !v.nil? } end end end