lib/polars/series.rb in polars-df-0.1.4 vs lib/polars/series.rb in polars-df-0.1.5
- old
+ new
@@ -21,11 +21,11 @@
# set the Polars dtype of the Series data. If not specified, Float32 is used.
#
# @example Constructing a Series by specifying name and values positionally:
# s = Polars::Series.new("a", [1, 2, 3])
#
- # @example Notice that the dtype is automatically inferred as a polars Int64:
+ # @example Notice that the dtype is automatically inferred as a polars `:i64`:
# s.dtype
# # => :i64
#
# @example Constructing a Series with a specific dtype:
# s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32)
@@ -43,10 +43,15 @@
end
end
name = "" if name.nil?
+ # TODO improve
+ if values.is_a?(Range) && values.begin.is_a?(String)
+ values = values.to_a
+ end
+
if values.nil?
self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty)
elsif values.is_a?(Series)
self._s = series_to_rbseries(name, values)
elsif values.is_a?(Range)
@@ -1666,12 +1671,36 @@
# # ]
def cast(dtype, strict: true)
super
end
- # def to_physical
- # end
+ # Cast to physical representation of the logical dtype.
+ #
+ # - `:date` -> `:i32`
+ # - `:datetime` -> `:i64`
+ # - `:time` -> `:i64`
+ # - `:duration` -> `:i64`
+ # - `:cat` -> `:u32`
+ # - other data types will be left unchanged.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("values", ["a", nil, "x", "a"])
+ # s.cast(:cat).to_physical
+ # # =>
+ # # shape: (4,)
+ # # Series: 'values' [u32]
+ # # [
+ # # 0
+ # # null
+ # # 1
+ # # 0
+ # # ]
+ def to_physical
+ super
+ end
# Convert this Series to a Ruby Array. This operation clones data.
#
# @return [Array]
#
@@ -1783,12 +1812,38 @@
# end
# def to_numo
# end
- # def set
- # end
+ # Set masked values.
+ #
+ # @param filter [Series]
+ # Boolean mask.
+ # @param value [Object]
+ # Value with which to replace the masked values.
+ #
+ # @return [Series]
+ #
+ # @note
+ # Use of this function is frequently an anti-pattern, as it can
+ # block optimization (predicate pushdown, etc). Consider using
+ # `Polars.when(predicate).then(value).otherwise(self)` instead.
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3])
+ # s.set(s == 2, 10)
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 1
+ # # 10
+ # # 3
+ # # ]
+ def set(filter, value)
+ Utils.wrap_s(_s.send("set_with_mask_#{dtype}", filter._s, value))
+ end
# Set values at the index locations.
#
# @param idx [Object]
# Integers representing the index locations.
@@ -2284,12 +2339,45 @@
# # ]
def tanh
super
end
- # def apply
- # end
+ # Apply a custom/user-defined function (UDF) over elements in this Series and
+ # return a new Series.
+ #
+ # If the function returns another datatype, the return_dtype arg should be set,
+ # otherwise the method will fail.
+ #
+ # @param return_dtype [Symbol]
+ # Output datatype. If none is given, the same datatype as this Series will be
+ # used.
+ # @param skip_nulls [Boolean]
+ # Nulls will be skipped and not passed to the Ruby function.
+ # This is faster because Ruby can be skipped and because we call
+ # more specialized functions.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3])
+ # s.apply { |x| x + 10 }
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [i64]
+ # # [
+ # # 11
+ # # 12
+ # # 13
+ # # ]
+ def apply(return_dtype: nil, skip_nulls: true, &func)
+ if return_dtype.nil?
+ pl_return_dtype = nil
+ else
+ pl_return_dtype = Utils.rb_type_to_dtype(return_dtype)
+ end
+ Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls))
+ end
# Shift the values by a given period.
#
# @param periods [Integer]
# Number of places to shift (may be negative).
@@ -2951,12 +3039,39 @@
series._s.shrink_to_fit
series
end
end
- # def _hash
- # end
+ # Hash the Series.
+ #
+ # The hash value is of type `:u64`.
+ #
+ # @param seed [Integer]
+ # Random seed parameter. Defaults to 0.
+ # @param seed_1 [Integer]
+ # Random seed parameter. Defaults to `seed` if not set.
+ # @param seed_2 [Integer]
+ # Random seed parameter. Defaults to `seed` if not set.
+ # @param seed_3 [Integer]
+ # Random seed parameter. Defaults to `seed` if not set.
+ #
+ # @return [Series]
+ #
+ # @example
+ # s = Polars::Series.new("a", [1, 2, 3])
+ # s._hash(42)
+ # # =>
+ # # shape: (3,)
+ # # Series: 'a' [u64]
+ # # [
+ # # 2374023516666777365
+ # # 10386026231460783898
+ # # 17796317186427479491
+ # # ]
+ def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil)
+ super
+ end
# Reinterpret the underlying bits as a signed/unsigned integer.
#
# This operation is only allowed for 64bit integers. For lower bits integers,
# you can safely use that cast operation.
@@ -3422,10 +3537,17 @@
end
if other.is_a?(Series)
return Utils.wrap_s(_s.send(op, other._s))
end
- raise Todo
+ if other.is_a?(Date) || other.is_a?(DateTime) || other.is_a?(Time) || other.is_a?(String)
+ raise Todo
+ end
+ if other.is_a?(Float) && !is_float
+ raise Todo
+ end
+
+ Utils.wrap_s(_s.send("#{op}_#{dtype}", other))
end
def series_to_rbseries(name, values)
# should not be in-place?
values.rename(name, in_place: true)