lib/polars/series.rb in polars-df-0.1.2 vs lib/polars/series.rb in polars-df-0.1.3
- old
+ new
@@ -1,8 +1,10 @@
module Polars
# A Series represents a single column in a polars DataFrame.
class Series
+ include ExprDispatch
+
# @private
attr_accessor :_s
# Create a new Series.
#
@@ -46,10 +48,12 @@
name = "" if name.nil?
if values.nil?
self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
+ elsif values.is_a?(Series)
+ self._s = series_to_rbseries(name, values)
elsif values.is_a?(Range)
self._s =
Polars.arange(
values.first,
values.last + (values.exclude_end? ? 0 : 1),
@@ -128,96 +132,184 @@
# Bitwise AND.
#
# @return [Series]
def &(other)
+ if !other.is_a?(Series)
+ other = Series.new([other])
+ end
Utils.wrap_s(_s.bitand(other._s))
end
# Bitwise OR.
#
# @return [Series]
def |(other)
+ if !other.is_a?(Series)
+ other = Series.new([other])
+ end
Utils.wrap_s(_s.bitor(other._s))
end
# Bitwise XOR.
#
# @return [Series]
def ^(other)
+ if !other.is_a?(Series)
+ other = Series.new([other])
+ end
Utils.wrap_s(_s.bitxor(other._s))
end
- # def ==(other)
- # end
+ # Equal.
+ #
+ # @return [Series]
+ def ==(other)
+ _comp(other, :eq)
+ end
- # def !=(other)
- # end
+ # Not equal.
+ #
+ # @return [Series]
+ def !=(other)
+ _comp(other, :neq)
+ end
- # def >(other)
- # end
+ # Greater than.
+ #
+ # @return [Series]
+ def >(other)
+ _comp(other, :gt)
+ end
- # def <(other)
- # end
+ # Less than.
+ #
+ # @return [Series]
+ def <(other)
+ _comp(other, :lt)
+ end
- # def >=(other)
- # end
+ # Greater than or equal.
+ #
+ # @return [Series]
+ def >=(other)
+ _comp(other, :gt_eq)
+ end
- # def <=(other)
- # end
+ # Less than or equal.
+ #
+ # @return [Series]
+ def <=(other)
+ _comp(other, :lt_eq)
+ end
# Performs addition.
#
# @return [Series]
def +(other)
- Utils. wrap_s(_s.add(other._s))
+ _arithmetic(other, :add)
end
# Performs subtraction.
#
# @return [Series]
def -(other)
- Utils.wrap_s(_s.sub(other._s))
+ _arithmetic(other, :sub)
end
# Performs multiplication.
#
# @return [Series]
def *(other)
- Utils.wrap_s(_s.mul(other._s))
+ _arithmetic(other, :mul)
end
# Performs division.
#
# @return [Series]
def /(other)
- Utils.wrap_s(_s.div(other._s))
+ _arithmetic(other, :div)
end
+ # Returns the modulo.
+ #
+ # @return [Series]
+ def %(other)
+ if is_datelike
+ raise ArgumentError, "first cast to integer before applying modulo on datelike dtypes"
+ end
+ _arithmetic(other, :rem)
+ end
+
# Raises to the power of exponent.
#
# @return [Series]
def **(power)
if is_datelike
raise ArgumentError, "first cast to integer before raising datelike dtypes to a power"
end
to_frame.select(Polars.col(name).pow(power)).to_series
end
- # def -@(other)
- # end
+ # Performs negation.
+ #
+ # @return [Series]
+ def -@
+ 0 - self
+ end
# Returns elements of the Series.
#
# @return [Object]
def [](item)
- _s.get_idx(item)
+ if item.is_a?(Integer)
+ return _s.get_idx(item)
+ end
+
+ if item.is_a?(Range)
+ return Slice.new(self).apply(item)
+ end
+
+ raise ArgumentError, "Cannot get item of type: #{item.class.name}"
end
- # def []=(key, value)
- # end
+ # Sets an element of the Series.
+ #
+ # @return [Object]
+ def []=(key, value)
+ if value.is_a?(Array)
+ if is_numeric || is_datelike
+ set_at_idx(key, value)
+ return
+ end
+ raise ArgumentError, "cannot set Series of dtype: #{dtype} with list/tuple as value; use a scalar value"
+ end
+ if key.is_a?(Series)
+ if key.dtype == :bool
+ self._s = set(key, value)._s
+ elsif key.dtype == :u64
+ self._s = set_at_idx(key.cast(:u32), value)._s
+ elsif key.dtype == :u32
+ self._s = set_at_idx(key, value)._s
+ else
+ raise Todo
+ end
+ end
+
+ if key.is_a?(Array)
+ s = Utils.wrap_s(sequence_to_rbseries("", key, dtype: :u32))
+ self[s] = value
+ elsif key.is_a?(Integer)
+ # TODO fix
+ # self[[key]] = value
+ set_at_idx(key, value)
+ else
+ raise ArgumentError, "cannot use #{key} for indexing"
+ end
+ end
+
# Return an estimation of the total (heap) allocated size of the Series.
#
# Estimated size is given in the specified unit (bytes by default).
#
# This estimation is the sum of the size of its buffers, validity, including
@@ -266,35 +358,144 @@
# @return [Boolean]
def all
to_frame.select(Polars.col(name).all).to_series[0]
end
- # def log
- # end
+ # Compute the logarithm to a given base.
+ #
+ # @param base [Float]
+ # Given base, defaults to `Math::E`.
+ #
+ # @return [Series]
+ def log(base = Math::E)
+ super
+ end
- # def log10
- # end
+ # Compute the base 10 logarithm of the input array, element-wise.
+ #
+ # @return [Series]
+ def log10
+ super
+ end
- # def exp
- # end
+ # Compute the exponential, element-wise.
+ #
+ # @return [Series]
+ def exp
+ super
+ end
- # def drop_nulls
- # end
+ # Create a new Series that copies data from this Series without null values.
+ #
+ # @return [Series]
+ def drop_nulls
+ super
+ end
- # def drop_nans
- # end
+ # Drop NaN values.
+ #
+ # @return [Series]
+ def drop_nans
+ super
+ end
# Cast this Series to a DataFrame.
#
# @return [DataFrame]
def to_frame
Utils.wrap_df(RbDataFrame.new([_s]))
end
- # def describe
- # end
+ # Quick summary statistics of a series.
+ #
+ # Series with mixed datatypes will return summary statistics for the datatype of
+ # the first value.
+ #
+ # @return [DataFrame]
+ #
+ # @example
+ # series_num = Polars::Series.new([1, 2, 3, 4, 5])
+ # series_num.describe
+ # # =>
+ # # shape: (6, 2)
+ # # ┌────────────┬──────────┐
+ # # │ statistic ┆ value │
+ # # │ --- ┆ --- │
+ # # │ str ┆ f64 │
+ # # ╞════════════╪══════════╡
+ # # │ min ┆ 1.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ max ┆ 5.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ null_count ┆ 0.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ mean ┆ 3.0 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ std ┆ 1.581139 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌┤
+ # # │ count ┆ 5.0 │
+ # # └────────────┴──────────┘
+ #
+ # @example
+ # series_str = Polars::Series.new(["a", "a", nil, "b", "c"])
+ # series_str.describe
+ # # =>
+ # # shape: (3, 2)
+ # # ┌────────────┬───────┐
+ # # │ statistic ┆ value │
+ # # │ --- ┆ --- │
+ # # │ str ┆ i64 │
+ # # ╞════════════╪═══════╡
+ # # │ unique ┆ 4 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+ # # │ null_count ┆ 1 │
+ # # ├╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌┤
+ # # │ count ┆ 5 │
+ # # └────────────┴───────┘
+ def describe
+ if len == 0
+ raise ArgumentError, "Series must contain at least one value"
+ elsif is_numeric
+ s = cast(:f64)
+ stats = {
+ "min" => s.min,
+ "max" => s.max,
+ "null_count" => s.null_count,
+ "mean" => s.mean,
+ "std" => s.std,
+ "count" => s.len
+ }
+ elsif is_boolean
+ stats = {
+ "sum" => sum,
+ "null_count" => null_count,
+ "count" => len
+ }
+ elsif is_utf8
+ stats = {
+ "unique" => unique.length,
+ "null_count" => null_count,
+ "count" => len
+ }
+ elsif is_datelike
+ # we coerce all to string, because a polars column
+ # only has a single dtype and dates: datetime and count: int don't match
+ stats = {
+ "min" => dt.min.to_s,
+ "max" => dt.max.to_s,
+ "null_count" => null_count.to_s,
+ "count" => len.to_s
+ }
+ else
+ raise TypeError, "This type is not supported"
+ end
+ Polars::DataFrame.new(
+ {"statistic" => stats.keys, "value" => stats.values}
+ )
+ end
+
# Reduce this Series to the sum value.
#
# @return [Numeric]
#
# @note
@@ -350,15 +551,23 @@
# # => 3
def max
_s.max
end
- # def nan_max
- # end
+ # Get maximum value, but propagate/poison encountered NaN values.
+ #
+ # @return [Object]
+ def nan_max
+ to_frame.select(Polars.col(name).nan_max)[0, 0]
+ end
- # def nan_min
- # end
+ # Get minimum value, but propagate/poison encountered NaN values.
+ #
+ # @return [Object]
+ def nan_min
+ to_frame.select(Polars.col(name).nan_min)[0, 0]
+ end
# Get the standard deviation of this Series.
#
# @param ddof [Integer]
# “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof,
@@ -476,15 +685,52 @@
# # └─────┴────────┘
def value_counts(sort: false)
Utils.wrap_df(_s.value_counts(sort))
end
- # def unique_counts
- # end
+ # Return a count of the unique values in the order of appearance.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("id", ["a", "b", "b", "c", "c", "c"])
+ # s.unique_counts
+ # # =>
+ # # shape: (3,)
+ # # Series: 'id' [u32]
+ # # [
+ # # 1
+ # # 2
+ # # 3
+ # # ]
+ def unique_counts
+ super
+ end
- # def entropy
- # end
+ # Computes the entropy.
+ #
+ # Uses the formula `-sum(pk * log(pk)` where `pk` are discrete probabilities.
+ #
+ # @param base [Float]
+ # Given base, defaults to `e`
+ # @param normalize [Boolean]
+ # Normalize pk if it doesn't sum to 1.
+ #
+ # @return [Float, nil]
+ #
+ # @example
+ # a = Polars::Series.new([0.99, 0.005, 0.005])
+ # a.entropy(normalize: true)
+ # # => 0.06293300616044681
+ #
+ # @example
+ # b = Polars::Series.new([0.65, 0.10, 0.25])
+ # b.entropy(normalize: true)
+ # # => 0.8568409950394724
+ def entropy(base: Math::E, normalize: false)
+ Polars.select(Polars.lit(self).entropy(base: base, normalize: normalize)).to_series[0]
+ end
# def cumulative_eval
# end
# Return a copy of the Series with a new alias/name.
@@ -583,11 +829,11 @@
# # 1
# # 3
# # 6
# # ]
def cumsum(reverse: false)
- Utils.wrap_s(_s.cumsum(reverse))
+ super
end
# Get an array with the cumulative min computed at every element.
#
# @param reverse [Boolean]
@@ -605,11 +851,11 @@
# # 3
# # 3
# # 1
# # ]
def cummin(reverse: false)
- Utils.wrap_s(_s.cummin(reverse))
+ super
end
# Get an array with the cumulative max computed at every element.
#
# @param reverse [Boolean]
@@ -627,11 +873,11 @@
# # 3
# # 5
# # 5
# # ]
def cummax(reverse: false)
- Utils.wrap_s(_s.cummax(reverse))
+ super
end
# Get an array with the cumulative product computed at every element.
#
# @param reverse [Boolean]
@@ -653,11 +899,11 @@
# # 1
# # 2
# # 6
# # ]
def cumprod(reverse: false)
- Utils.wrap_s(_s.cumprod(reverse))
+ super
end
# Get the first `n` rows.
#
# Alias for {#head}.
@@ -700,12 +946,11 @@
# # [
# # 2
# # 3
# # ]
def slice(offset, length = nil)
- length = len if length.nil?
- Utils.wrap_s(_s.slice(offset, length))
+ super
end
# Append a Series to this one.
#
# @param other [Series]
@@ -833,12 +1078,27 @@
# # ]
def tail(n = 10)
to_frame.select(Utils.col(name).tail(n)).to_series
end
- # def take_every
- # end
+ # Take every nth value in the Series and return as new Series.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
+ # s.take_every(2)
+ # # =>
+ # # shape: (2,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 1
+ # # 3
+ # # ]
+ def take_every(n)
+ super
+ end
# Sort this Series.
#
# @param reverse [Boolean]
# Reverse sort.
@@ -876,21 +1136,82 @@
else
Utils.wrap_s(_s.sort(reverse))
end
end
- # def top_k
- # end
+ # Return the `k` largest elements.
+ #
+ # If `reverse: true`, the smallest elements will be given.
+ #
+ # @param k [Integer]
+ # Number of elements to return.
+ # @param reverse [Boolean]
+ # Return the smallest elements.
+ #
+ # @return [Boolean]
+ def top_k(k: 5, reverse: false)
+ super
+ end
- # def arg_sort
- # end
+ # Get the index values that would sort this Series.
+ #
+ # @param reverse [Boolean]
+ # Sort in reverse (descending) order.
+ # @param nulls_last [Boolean]
+ # Place null values last instead of first.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [5, 3, 4, 1, 2])
+ # s.arg_sort
+ # # =>
+ # # shape: (5,)
+ # # Series: 'a' [u32]
+ # # [
+ # # 3
+ # # 4
+ # # 1
+ # # 2
+ # # 0
+ # # ]
+ def arg_sort(reverse: false, nulls_last: false)
+ super
+ end
- # def argsort
- # end
+ # Get the index values that would sort this Series.
+ #
+ # Alias for {#arg_sort}.
+ #
+ # @param reverse [Boolean]
+ # Sort in reverse (descending) order.
+ # @param nulls_last [Boolean]
+ # Place null values last instead of first.
+ #
+ # @return [Series]
+ def argsort(reverse: false, nulls_last: false)
+ super
+ end
- # def arg_unique
- # end
+ # Get unique index as Series.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
+ # s.arg_unique
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [u32]
+ # # [
+ # # 0
+ # # 1
+ # # 3
+ # # ]
+ def arg_unique
+ super
+ end
# Get the index of the minimal value.
#
# @return [Integer, nil]
#
@@ -912,27 +1233,71 @@
# # => 0
def arg_max
_s.arg_max
end
- # def search_sorted
- # end
+ # Find indices where elements should be inserted to maintain order.
+ #
+ # @param element [Object]
+ # Expression or scalar value.
+ #
+ # @return [Integer]
+ def search_sorted(element)
+ Polars.select(Polars.lit(self).search_sorted(element))[0, 0]
+ end
- # def unique
- # end
+ # Get unique elements in series.
+ #
+ # @param maintain_order [Boolean]
+ # Maintain order of data. This requires more work.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
+ # s.unique.sort
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 1
+ # # 2
+ # # 3
+ # # ]
+ def unique(maintain_order: false)
+ super
+ end
- # def take
- # end
+ # Take values by index.
+ #
+ # @param indices [Array]
+ # Index location used for selection.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3, 4])
+ # s.take([1, 3])
+ # # =>
+ # # shape: (2,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 2
+ # # 4
+ # # ]
+ def take(indices)
+ to_frame.select(Polars.col(name).take(indices)).to_series
+ end
# Count the null values in this Series.
#
# @return [Integer]
def null_count
_s.null_count
end
- # Return True if the Series has a validity bitmask.
+ # Return `true` if the Series has a validity bitmask.
#
# If there is none, it means that there are no null values.
# Use this to swiftly assert a Series does not have null values.
#
# @return [Boolean]
@@ -951,45 +1316,218 @@
def is_empty
len == 0
end
alias_method :empty?, :is_empty
- # def is_null
- # end
+ # Returns a boolean Series indicating which values are null.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
+ # s.is_null
+ # # =>
+ # # shape: (4,)
+ # # Series: 'a' [bool]
+ # # [
+ # # false
+ # # false
+ # # false
+ # # true
+ # # ]
+ def is_null
+ super
+ end
- # def is_not_null
- # end
+ # Returns a boolean Series indicating which values are not null.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, nil])
+ # s.is_not_null
+ # # =>
+ # # shape: (4,)
+ # # Series: 'a' [bool]
+ # # [
+ # # true
+ # # true
+ # # true
+ # # false
+ # # ]
+ def is_not_null
+ super
+ end
- # def is_finite
- # end
+ # Returns a boolean Series indicating which values are finite.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
+ # s.is_finite
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [bool]
+ # # [
+ # # true
+ # # true
+ # # false
+ # # ]
+ def is_finite
+ super
+ end
- # def is_infinite
- # end
+ # Returns a boolean Series indicating which values are infinite.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 2.0, Float::INFINITY])
+ # s.is_infinite
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [bool]
+ # # [
+ # # false
+ # # false
+ # # true
+ # # ]
+ def is_infinite
+ super
+ end
- # def is_nan
- # end
+ # Returns a boolean Series indicating which values are NaN.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
+ # s.is_nan
+ # # =>
+ # # shape: (4,)
+ # # Series: 'a' [bool]
+ # # [
+ # # false
+ # # false
+ # # false
+ # # true
+ # # ]
+ def is_nan
+ super
+ end
- # def is_not_nan
- # end
+ # Returns a boolean Series indicating which values are not NaN.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
+ # s.is_not_nan
+ # # =>
+ # # shape: (4,)
+ # # Series: 'a' [bool]
+ # # [
+ # # true
+ # # true
+ # # true
+ # # false
+ # # ]
+ def is_not_nan
+ super
+ end
# def is_in
# end
- # def arg_true
- # end
+ # Get index values where Boolean Series evaluate `true`.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3])
+ # (s == 2).arg_true
+ # # =>
+ # # shape: (1,)
+ # # Series: 'a' [u32]
+ # # [
+ # # 1
+ # # ]
+ def arg_true
+ Polars.arg_where(self, eager: true)
+ end
- # def is_unique
- # end
+ # Get mask of all unique values.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
+ # s.is_unique
+ # # =>
+ # # shape: (4,)
+ # # Series: 'a' [bool]
+ # # [
+ # # true
+ # # false
+ # # false
+ # # true
+ # # ]
+ def is_unique
+ super
+ end
- # def is_first
- # end
+ # Get a mask of the first unique value.
+ #
+ # @return [Series]
+ def is_first
+ super
+ end
- # def is_duplicated
- # end
+ # Get mask of all duplicated values.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
+ # s.is_duplicated
+ # # =>
+ # # shape: (4,)
+ # # Series: 'a' [bool]
+ # # [
+ # # false
+ # # true
+ # # true
+ # # false
+ # # ]
+ def is_duplicated
+ super
+ end
- # def explode
- # end
+ # Explode a list or utf8 Series.
+ #
+ # This means that every item is expanded to a new row.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [[1, 2], [3, 4], [9, 10]])
+ # s.explode
+ # # =>
+ # # shape: (6,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 1
+ # # 2
+ # # 3
+ # # 4
+ # # 9
+ # # 10
+ # # ]
+ def explode
+ super
+ end
# Check if series is equal with another Series.
#
# @param other [Series]
# Series to compare with.
@@ -1023,12 +1561,33 @@
def len
_s.len
end
alias_method :length, :len
- # def cast
- # end
+ # Cast between data types.
+ #
+ # @param dtype [Symbol]
+ # DataType to cast to
+ # @param strict [Boolean]
+ # Throw an error if a cast could not be done for instance due to an overflow
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [true, false, true])
+ # s.cast(:u32)
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [u32]
+ # # [
+ # # 1
+ # # 0
+ # # 1
+ # # ]
+ def cast(dtype, strict: true)
+ super
+ end
# def to_physical
# end
# Convert this Series to a Ruby Array. This operation clones data.
@@ -1052,12 +1611,28 @@
def rechunk(in_place: false)
opt_s = _s.rechunk(in_place)
in_place ? self : Utils.wrap_s(opt_s)
end
- # def reverse
- # end
+ # Return Series in reverse order.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3], dtype: :i8)
+ # s.reverse
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [i8]
+ # # [
+ # # 3
+ # # 2
+ # # 1
+ # # ]
+ def reverse
+ super
+ end
# Check if this Series datatype is numeric.
#
# @return [Boolean]
#
@@ -1130,23 +1705,147 @@
# end
# def set
# end
- # def set_at_idx
- # end
+ # Set values at the index locations.
+ #
+ # @param idx [Object]
+ # Integers representing the index locations.
+ # @param value [Object]
+ # Replacement values.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3])
+ # s.set_at_idx(1, 10)
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 1
+ # # 10
+ # # 3
+ # # ]
+ def set_at_idx(idx, value)
+ if idx.is_a?(Integer)
+ idx = [idx]
+ end
+ if idx.length == 0
+ return self
+ end
- # def cleared
- # end
+ idx = Series.new("", idx)
+ if value.is_a?(Integer) || value.is_a?(Float) || Utils.bool?(value) || value.is_a?(String) || value.nil?
+ value = Series.new("", [value])
+ # if we need to set more than a single value, we extend it
+ if idx.length > 0
+ value = value.extend_constant(value[0], idx.length - 1)
+ end
+ elsif !value.is_a?(Series)
+ value = Series.new("", value)
+ end
+ _s.set_at_idx(idx._s, value._s)
+ self
+ end
+
+ # Create an empty copy of the current Series.
+ #
+ # The copy has identical name/dtype but no data.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [nil, true, false])
+ # s.cleared
+ # # =>
+ # # shape: (0,)
+ # # Series: 'a' [bool]
+ # # [
+ # # ]
+ def cleared
+ len > 0 ? limit(0) : clone
+ end
+
# clone handled by initialize_copy
- # def fill_nan
- # end
+ # Fill floating point NaN value with a fill value.
+ #
+ # @param fill_value [Object]
+ # Value used to fill nan values.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 2.0, 3.0, Float::NAN])
+ # s.fill_nan(0)
+ # # =>
+ # # shape: (4,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 1.0
+ # # 2.0
+ # # 3.0
+ # # 0.0
+ # # ]
+ def fill_nan(fill_value)
+ super
+ end
- # def fill_null
- # end
+ # Fill null values using the specified value or strategy.
+ #
+ # @param value [Object]
+ # Value used to fill null values.
+ # @param strategy [nil, "forward", "backward", "min", "max", "mean", "zero", "one"]
+ # Strategy used to fill null values.
+ # @param limit
+ # Number of consecutive null values to fill when using the "forward" or
+ # "backward" strategy.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3, nil])
+ # s.fill_null(strategy: "forward")
+ # # =>
+ # # shape: (4,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 1
+ # # 2
+ # # 3
+ # # 3
+ # # ]
+ #
+ # @example
+ # s.fill_null(strategy: "min")
+ # # =>
+ # # shape: (4,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 1
+ # # 2
+ # # 3
+ # # 1
+ # # ]
+ #
+ # @example
+ # s = Polars::Series.new("b", ["x", nil, "z"])
+ # s.fill_null(Polars.lit(""))
+ # # =>
+ # # shape: (3,)
+ # # Series: 'b' [str]
+ # # [
+ # # "x"
+ # # ""
+ # # "z"
+ # # ]
+ def fill_null(value = nil, strategy: nil, limit: nil)
+ super
+ end
# Rounds down to the nearest integer value.
#
# Only works on floating point Series.
#
@@ -1183,11 +1882,11 @@
# # 2.0
# # 3.0
# # 4.0
# # ]
def ceil
- Utils.wrap_s(_s.ceil)
+ super
end
# Round underlying floating point data by `decimals` digits.
#
# @param decimals [Integer]
@@ -1205,69 +1904,380 @@
# # 1.12
# # 2.57
# # 3.9
# # ]
def round(decimals = 0)
- Utils.wrap_s(_s.round(decimals))
+ super
end
# def dot
# end
- # def mode
- # end
+ # Compute the most occurring value(s).
+ #
+ # Can return multiple Values.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 2, 3])
+ # s.mode
+ # # =>
+ # # shape: (1,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 2
+ # # ]
+ def mode
+ super
+ end
- # def sign
- # end
+ # Compute the element-wise indication of the sign.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [-9.0, -0.0, 0.0, 4.0, nil])
+ # s.sign
+ # # =>
+ # # shape: (5,)
+ # # Series: 'a' [i64]
+ # # [
+ # # -1
+ # # 0
+ # # 0
+ # # 1
+ # # null
+ # # ]
+ def sign
+ super
+ end
- # def sin
- # end
+ # Compute the element-wise value for the sine.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
+ # s.sin
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 0.0
+ # # 1.0
+ # # 1.2246e-16
+ # # ]
+ def sin
+ super
+ end
- # def cos
- # end
+ # Compute the element-wise value for the cosine.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
+ # s.cos
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 1.0
+ # # 6.1232e-17
+ # # -1.0
+ # # ]
+ def cos
+ super
+ end
- # def tan
- # end
+ # Compute the element-wise value for the tangent.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [0.0, Math::PI / 2.0, Math::PI])
+ # s.tan
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 0.0
+ # # 1.6331e16
+ # # -1.2246e-16
+ # # ]
+ def tan
+ super
+ end
- # def arcsin
- # end
+ # Compute the element-wise value for the inverse sine.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
+ # s.arcsin
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 1.570796
+ # # 0.0
+ # # -1.570796
+ # # ]
+ def arcsin
+ super
+ end
- # def arccos
- # end
+ # Compute the element-wise value for the inverse cosine.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
+ # s.arccos
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 0.0
+ # # 1.570796
+ # # 3.141593
+ # # ]
+ def arccos
+ super
+ end
- # def arctan
- # end
+ # Compute the element-wise value for the inverse tangent.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
+ # s.arctan
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 0.785398
+ # # 0.0
+ # # -0.785398
+ # # ]
+ def arctan
+ super
+ end
- # def arcsinh
- # end
+ # Compute the element-wise value for the inverse hyperbolic sine.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
+ # s.arcsinh
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 0.881374
+ # # 0.0
+ # # -0.881374
+ # # ]
+ def arcsinh
+ super
+ end
- # def arccosh
- # end
+ # Compute the element-wise value for the inverse hyperbolic cosine.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [5.0, 1.0, 0.0, -1.0])
+ # s.arccosh
+ # # =>
+ # # shape: (4,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 2.292432
+ # # 0.0
+ # # NaN
+ # # NaN
+ # # ]
+ def arccosh
+ super
+ end
- # def arctanh
- # end
+ # Compute the element-wise value for the inverse hyperbolic tangent.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [2.0, 1.0, 0.5, 0.0, -0.5, -1.0, -1.1])
+ # s.arctanh
+ # # =>
+ # # shape: (7,)
+ # # Series: 'a' [f64]
+ # # [
+ # # NaN
+ # # inf
+ # # 0.549306
+ # # 0.0
+ # # -0.549306
+ # # -inf
+ # # NaN
+ # # ]
+ def arctanh
+ super
+ end
- # def sinh
- # end
+ # Compute the element-wise value for the hyperbolic sine.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
+ # s.sinh
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 1.175201
+ # # 0.0
+ # # -1.175201
+ # # ]
+ def sinh
+ super
+ end
- # def cosh
- # end
+ # Compute the element-wise value for the hyperbolic cosine.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
+ # s.cosh
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 1.543081
+ # # 1.0
+ # # 1.543081
+ # # ]
+ def cosh
+ super
+ end
- # def tanh
- # end
+ # Compute the element-wise value for the hyperbolic tangent.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1.0, 0.0, -1.0])
+ # s.tanh
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [f64]
+ # # [
+ # # 0.761594
+ # # 0.0
+ # # -0.761594
+ # # ]
+ def tanh
+ super
+ end
# def apply
# end
- # def shift
- # end
+ # Shift the values by a given period.
+ #
+ # @param periods [Integer]
+ # Number of places to shift (may be negative).
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3])
+ # s.shift(1)
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [i64]
+ # # [
+ # # null
+ # # 1
+ # # 2
+ # # ]
+ #
+ # @example
+ # s.shift(-1)
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 2
+ # # 3
+ # # null
+ # # ]
+ def shift(periods = 1)
+ super
+ end
- # def shift_and_fill
- # end
+ # Shift the values by a given period and fill the resulting null values.
+ #
+ # @param periods [Integer]
+ # Number of places to shift (may be negative).
+ # @param fill_value [Object]
+ # Fill None values with the result of this expression.
+ #
+ # @return [Series]
+ def shift_and_fill(periods, fill_value)
+ super
+ end
- # def zip_with
- # end
+ # Take values from self or other based on the given mask.
+ #
+ # Where mask evaluates true, take values from self. Where mask evaluates false,
+ # take values from other.
+ #
+ # @param mask [Series]
+ # Boolean Series.
+ # @param other [Series]
+ # Series of same type.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s1 = Polars::Series.new([1, 2, 3, 4, 5])
+ # s2 = Polars::Series.new([5, 4, 3, 2, 1])
+ # s1.zip_with(s1 < s2, s2)
+ # # =>
+ # # shape: (5,)
+ # # Series: '' [i64]
+ # # [
+ # # 1
+ # # 2
+ # # 3
+ # # 2
+ # # 1
+ # # ]
+ #
+ # @example
+ # mask = Polars::Series.new([true, false, true, false, true])
+ # s1.zip_with(mask, s2)
+ # # =>
+ # # shape: (5,)
+ # # Series: '' [i64]
+ # # [
+ # # 1
+ # # 4
+ # # 3
+ # # 2
+ # # 5
+ # # ]
+ def zip_with(mask, other)
+ Utils.wrap_s(_s.zip_with(mask._s, other._s))
+ end
# def rolling_min
# end
# def rolling_max
@@ -1352,66 +2362,252 @@
# # => 3
def n_unique
_s.n_unique
end
- # def shrink_to_fit
- # end
+ # Shrink Series memory usage.
+ #
+ # Shrinks the underlying array capacity to exactly fit the actual data.
+ # (Note that this function does not change the Series data type).
+ #
+ # @return [Series]
+ def shrink_to_fit(in_place: false)
+ if in_place
+ _s.shrink_to_fit
+ self
+ else
+ series = clone
+ series._s.shrink_to_fit
+ series
+ end
+ end
# def _hash
# end
- # def reinterpret
- # end
+ # Reinterpret the underlying bits as a signed/unsigned integer.
+ #
+ # This operation is only allowed for 64bit integers. For lower bits integers,
+ # you can safely use that cast operation.
+ #
+ # @param signed [Boolean]
+ # If true, reinterpret as `:i64`. Otherwise, reinterpret as `:u64`.
+ #
+ # @return [Series]
+ def reinterpret(signed: true)
+ super
+ end
- # def interpolate
- # end
+ # Interpolate intermediate values. The interpolation method is linear.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, nil, nil, 5])
+ # s.interpolate
+ # # =>
+ # # shape: (5,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 1
+ # # 2
+ # # 3
+ # # 4
+ # # 5
+ # # ]
+ def interpolate
+ super
+ end
- # def abs
- # end
+ # Compute absolute values.
+ #
+ # @return [Series]
+ def abs
+ super
+ end
# def rank
# end
- # def diff
- # end
+ # Calculate the n-th discrete difference.
+ #
+ # @param n [Integer]
+ # Number of slots to shift.
+ # @param null_behavior ["ignore", "drop"]
+ # How to handle null values.
+ #
+ # @return [Series]
+ def diff(n: 1, null_behavior: "ignore")
+ super
+ end
# def pct_change
# end
- # def skew
- # end
+ # Compute the sample skewness of a data set.
+ #
+ # For normally distributed data, the skewness should be about zero. For
+ # unimodal continuous distributions, a skewness value greater than zero means
+ # that there is more weight in the right tail of the distribution. The
+ # function `skewtest` can be used to determine if the skewness value
+ # is close enough to zero, statistically speaking.
+ #
+ # @param bias [Boolean]
+ # If `false`, the calculations are corrected for statistical bias.
+ #
+ # @return [Float, nil]
+ def skew(bias: true)
+ _s.skew(bias)
+ end
- # def kurtosis
- # end
+ # Compute the kurtosis (Fisher or Pearson) of a dataset.
+ #
+ # Kurtosis is the fourth central moment divided by the square of the
+ # variance. If Fisher's definition is used, then 3.0 is subtracted from
+ # the result to give 0.0 for a normal distribution.
+ # If bias is false, then the kurtosis is calculated using k statistics to
+ # eliminate bias coming from biased moment estimators
+ #
+ # @param fisher [Boolean]
+ # If `true`, Fisher's definition is used (normal ==> 0.0). If `false`,
+ # Pearson's definition is used (normal ==> 3.0).
+ # @param bias [Boolean]
+ # If `false`, the calculations are corrected for statistical bias.
+ #
+ # @return [Float, nil]
+ def kurtosis(fisher: true, bias: true)
+ _s.kurtosis(fisher, bias)
+ end
- # def clip
- # end
+ # Clip (limit) the values in an array to a `min` and `max` boundary.
+ #
+ # Only works for numerical types.
+ #
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
+ # expression. See {#when} for more information.
+ #
+ # @param min_val [Numeric]
+ # Minimum value.
+ # @param max_val [Numeric]
+ # Maximum value.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("foo", [-50, 5, nil, 50])
+ # s.clip(1, 10)
+ # # =>
+ # # shape: (4,)
+ # # Series: 'foo' [i64]
+ # # [
+ # # 1
+ # # 5
+ # # null
+ # # 10
+ # # ]
+ def clip(min_val, max_val)
+ super
+ end
- # def clip_min
- # end
+ # Clip (limit) the values in an array to a `min` boundary.
+ #
+ # Only works for numerical types.
+ #
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
+ # expression. See {#when} for more information.
+ #
+ # @param min_val [Numeric]
+ # Minimum value.
+ #
+ # @return [Series]
+ def clip_min(min_val)
+ super
+ end
- # def clip_max
- # end
+ # Clip (limit) the values in an array to a `max` boundary.
+ #
+ # Only works for numerical types.
+ #
+ # If you want to clip other dtypes, consider writing a "when, then, otherwise"
+ # expression. See {#when} for more information.
+ #
+ # @param max_val [Numeric]
+ # Maximum value.
+ #
+ # @return [Series]
+ def clip_max(max_val)
+ super
+ end
- # def reshape
- # end
+ # Reshape this Series to a flat Series or a Series of Lists.
+ #
+ # @param dims [Array]
+ # Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that
+ # dimension is inferred.
+ #
+ # @return [Series]
+ def reshape(dims)
+ super
+ end
- # def shuffle
- # end
+ # Shuffle the contents of this Series.
+ #
+ # @param seed [Integer, nil]
+ # Seed for the random number generator.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3])
+ # s.shuffle(seed: 1)
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 2
+ # # 1
+ # # 3
+ # # ]
+ def shuffle(seed: nil)
+ super
+ end
# def ewm_mean
# end
# def ewm_std
# end
# def ewm_var
# end
- # def extend_constant
- # end
+ # Extend the Series with given number of values.
+ #
+ # @param value [Object]
+ # The value to extend the Series with. This value may be `nil` to fill with
+ # nulls.
+ # @param n [Integer]
+ # The number of values to extend.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3])
+ # s.extend_constant(99, 2)
+ # # =>
+ # # shape: (5,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 1
+ # # 2
+ # # 3
+ # # 99
+ # # 99
+ # # ]
+ def extend_constant(value, n)
+ super
+ end
# Flags the Series as sorted.
#
# Enables downstream code to user fast paths for sorted arrays.
#
@@ -1430,15 +2626,26 @@
# # => 3
def set_sorted(reverse: false)
Utils.wrap_s(_s.set_sorted(reverse))
end
- # def new_from_index
- # end
+ # Create a new Series filled with values from the given index.
+ #
+ # @return [Series]
+ def new_from_index(index, length)
+ Utils.wrap_s(_s.new_from_index(index, length))
+ end
- # def shrink_dtype
- # end
+ # Shrink numeric columns to the minimal required datatype.
+ #
+ # Shrink to the dtype needed to fit the extrema of this Series.
+ # This can be used to reduce memory pressure.
+ #
+ # @return [Series]
+ def shrink_dtype
+ super
+ end
# def arr
# end
# def cat
@@ -1458,31 +2665,69 @@
def initialize_copy(other)
super
self._s = _s._clone
end
+ def coerce(other)
+ if other.is_a?(Numeric)
+ # TODO improve
+ series = to_frame.select(Polars.lit(other)).to_series
+ [series, self]
+ else
+ raise TypeError, "#{self.class} can't be coerced into #{other.class}"
+ end
+ end
+
+ def _comp(other, op)
+ if other.is_a?(Series)
+ return Utils.wrap_s(_s.send(op, other._s))
+ end
+
+ if dtype == :str
+ raise Todo
+ end
+ Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
+ end
+
+ def _arithmetic(other, op)
+ if other.is_a?(Expr)
+ other = to_frame.select(other).to_series
+ end
+ if other.is_a?(Series)
+ return Utils.wrap_s(_s.send(op, other._s))
+ end
+
+ raise Todo
+ end
+
+ def series_to_rbseries(name, values)
+ # should not be in-place?
+ values.rename(name, in_place: true)
+ values._s
+ end
+
def sequence_to_rbseries(name, values, dtype: nil, strict: true, dtype_if_empty: nil)
ruby_dtype = nil
+ nested_dtype = nil
if (values.nil? || values.empty?) && dtype.nil?
if dtype_if_empty
# if dtype for empty sequence could be guessed
# (e.g comparisons between self and other)
dtype = dtype_if_empty
else
# default to Float32 type
- dtype = "f32"
+ dtype = :f32
end
end
rb_temporal_types = []
rb_temporal_types << Date if defined?(Date)
rb_temporal_types << DateTime if defined?(DateTime)
rb_temporal_types << Time if defined?(Time)
- # _get_first_non_none
- value = values.find { |v| !v.nil? }
+ value = _get_first_non_none(values)
if !dtype.nil? && Utils.is_polars_dtype(dtype) && ruby_dtype.nil?
constructor = polars_type_to_constructor(dtype)
rbseries = constructor.call(name, values, strict)
return rbseries
@@ -1502,10 +2747,25 @@
# dtype = rb_type_to_dtype(ruby_dtype)
# elsif rb_temporal_types.include?(dtype)
# dtype = rb_type_to_dtype(dtype)
# end
+ if ruby_dtype == Date
+ RbSeries.new_opt_date(name, values, strict)
+ else
+ raise Todo
+ end
+ elsif ruby_dtype == Array
+ if nested_dtype.nil?
+ nested_value = _get_first_non_none(value)
+ nested_dtype = nested_value.nil? ? Float : nested_value.class
+ end
+
+ if nested_dtype == Array
+ raise Todo
+ end
+
raise Todo
else
constructor = rb_type_to_constructor(value.class)
constructor.call(name, values, strict)
end
@@ -1544,8 +2804,12 @@
def rb_type_to_constructor(dtype)
RB_TYPE_TO_CONSTRUCTOR.fetch(dtype)
rescue KeyError
# RbSeries.method(:new_object)
raise ArgumentError, "Cannot determine type"
+ end
+
+ def _get_first_non_none(values)
+ values.find { |v| !v.nil? }
end
end
end