lib/polars/series.rb in polars-df-0.1.4 vs lib/polars/series.rb in polars-df-0.1.5

- old
+ new

@@ -21,11 +21,11 @@ # set the Polars dtype of the Series data. If not specified, Float32 is used. # # @example Constructing a Series by specifying name and values positionally: # s = Polars::Series.new("a", [1, 2, 3]) # - # @example Notice that the dtype is automatically inferred as a polars Int64: + # @example Notice that the dtype is automatically inferred as a polars `:i64`: # s.dtype # # => :i64 # # @example Constructing a Series with a specific dtype: # s2 = Polars::Series.new("a", [1, 2, 3], dtype: :f32) @@ -43,10 +43,15 @@ end end name = "" if name.nil? + # TODO improve + if values.is_a?(Range) && values.begin.is_a?(String) + values = values.to_a + end + if values.nil? self._s = sequence_to_rbseries(name, [], dtype: dtype, dtype_if_empty: dtype_if_empty) elsif values.is_a?(Series) self._s = series_to_rbseries(name, values) elsif values.is_a?(Range) @@ -1666,12 +1671,36 @@ # # ] def cast(dtype, strict: true) super end - # def to_physical - # end + # Cast to physical representation of the logical dtype. + # + # - `:date` -> `:i32` + # - `:datetime` -> `:i64` + # - `:time` -> `:i64` + # - `:duration` -> `:i64` + # - `:cat` -> `:u32` + # - other data types will be left unchanged. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("values", ["a", nil, "x", "a"]) + # s.cast(:cat).to_physical + # # => + # # shape: (4,) + # # Series: 'values' [u32] + # # [ + # # 0 + # # null + # # 1 + # # 0 + # # ] + def to_physical + super + end # Convert this Series to a Ruby Array. This operation clones data. # # @return [Array] # @@ -1783,12 +1812,38 @@ # end # def to_numo # end - # def set - # end + # Set masked values. + # + # @param filter [Series] + # Boolean mask. + # @param value [Object] + # Value with which to replace the masked values. + # + # @return [Series] + # + # @note + # Use of this function is frequently an anti-pattern, as it can + # block optimization (predicate pushdown, etc). Consider using + # `Polars.when(predicate).then(value).otherwise(self)` instead. + # + # @example + # s = Polars::Series.new("a", [1, 2, 3]) + # s.set(s == 2, 10) + # # => + # # shape: (3,) + # # Series: 'a' [i64] + # # [ + # # 1 + # # 10 + # # 3 + # # ] + def set(filter, value) + Utils.wrap_s(_s.send("set_with_mask_#{dtype}", filter._s, value)) + end # Set values at the index locations. # # @param idx [Object] # Integers representing the index locations. @@ -2284,12 +2339,45 @@ # # ] def tanh super end - # def apply - # end + # Apply a custom/user-defined function (UDF) over elements in this Series and + # return a new Series. + # + # If the function returns another datatype, the return_dtype arg should be set, + # otherwise the method will fail. + # + # @param return_dtype [Symbol] + # Output datatype. If none is given, the same datatype as this Series will be + # used. + # @param skip_nulls [Boolean] + # Nulls will be skipped and not passed to the Ruby function. + # This is faster because Ruby can be skipped and because we call + # more specialized functions. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3]) + # s.apply { |x| x + 10 } + # # => + # # shape: (3,) + # # Series: 'a' [i64] + # # [ + # # 11 + # # 12 + # # 13 + # # ] + def apply(return_dtype: nil, skip_nulls: true, &func) + if return_dtype.nil? + pl_return_dtype = nil + else + pl_return_dtype = Utils.rb_type_to_dtype(return_dtype) + end + Utils.wrap_s(_s.apply_lambda(func, pl_return_dtype, skip_nulls)) + end # Shift the values by a given period. # # @param periods [Integer] # Number of places to shift (may be negative). @@ -2951,12 +3039,39 @@ series._s.shrink_to_fit series end end - # def _hash - # end + # Hash the Series. + # + # The hash value is of type `:u64`. + # + # @param seed [Integer] + # Random seed parameter. Defaults to 0. + # @param seed_1 [Integer] + # Random seed parameter. Defaults to `seed` if not set. + # @param seed_2 [Integer] + # Random seed parameter. Defaults to `seed` if not set. + # @param seed_3 [Integer] + # Random seed parameter. Defaults to `seed` if not set. + # + # @return [Series] + # + # @example + # s = Polars::Series.new("a", [1, 2, 3]) + # s._hash(42) + # # => + # # shape: (3,) + # # Series: 'a' [u64] + # # [ + # # 2374023516666777365 + # # 10386026231460783898 + # # 17796317186427479491 + # # ] + def _hash(seed = 0, seed_1 = nil, seed_2 = nil, seed_3 = nil) + super + end # Reinterpret the underlying bits as a signed/unsigned integer. # # This operation is only allowed for 64bit integers. For lower bits integers, # you can safely use that cast operation. @@ -3422,10 +3537,17 @@ end if other.is_a?(Series) return Utils.wrap_s(_s.send(op, other._s)) end - raise Todo + if other.is_a?(Date) || other.is_a?(DateTime) || other.is_a?(Time) || other.is_a?(String) + raise Todo + end + if other.is_a?(Float) && !is_float + raise Todo + end + + Utils.wrap_s(_s.send("#{op}_#{dtype}", other)) end def series_to_rbseries(name, values) # should not be in-place? values.rename(name, in_place: true)