ext/polars/src/series.rs in polars-df-0.1.0 vs ext/polars/src/series.rs in polars-df-0.1.1

- old
+ new

@@ -1,13 +1,14 @@ -use crate::conversion::wrap; -use crate::{RbDataFrame, RbPolarsErr, RbResult}; use magnus::exception::arg_error; use magnus::{Error, RArray, Value}; use polars::prelude::*; use polars::series::IsSorted; use std::cell::RefCell; +use crate::conversion::*; +use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError}; + #[magnus::wrap(class = "Polars::RbSeries")] pub struct RbSeries { pub series: RefCell<Series>, } @@ -22,10 +23,18 @@ RbSeries { series: RefCell::new(series), } } + pub fn is_sorted_flag(&self) -> bool { + matches!(self.series.borrow().is_sorted(), IsSorted::Ascending) + } + + pub fn is_sorted_reverse_flag(&self) -> bool { + matches!(self.series.borrow().is_sorted(), IsSorted::Descending) + } + pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> { let len = obj.len(); let mut builder = BooleanChunkedBuilder::new(&name, len); unsafe { @@ -112,20 +121,47 @@ let mut s = Utf8Chunked::new(&name, v).into_series(); s.rename(&name); Ok(RbSeries::new(s)) } + pub fn estimated_size(&self) -> usize { + self.series.borrow().estimated_size() + } + + pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String { + let val = format!("{}", self.series.borrow().get(index)); + if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() { + let v_trunc = &val[..val + .char_indices() + .take(str_lengths) + .last() + .map(|(i, c)| i + c.len_utf8()) + .unwrap_or(0)]; + if val == v_trunc { + val + } else { + format!("{}...", v_trunc) + } + } else { + val + } + } + pub fn rechunk(&self, in_place: bool) -> Option<Self> { let series = self.series.borrow_mut().rechunk(); if in_place { *self.series.borrow_mut() = series; None } else { Some(series.into()) } } + pub fn get_idx(&self, idx: usize) -> Value { + Wrap(self.series.borrow().get(idx)).into() + } + pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> { let out = self .series .borrow() .bitand(&other.series.borrow()) @@ -194,19 +230,19 @@ _ => self.series.borrow().mean(), } } pub fn max(&self) -> Value { - wrap(self.series.borrow().max_as_series().get(0)) + Wrap(self.series.borrow().max_as_series().get(0)).into() } pub fn min(&self) -> Value { - wrap(self.series.borrow().min_as_series().get(0)) + Wrap(self.series.borrow().min_as_series().get(0)).into() } pub fn sum(&self) -> Value { - wrap(self.series.borrow().sum_as_series().get(0)) + Wrap(self.series.borrow().sum_as_series().get(0)).into() } pub fn n_chunks(&self) -> usize { self.series.borrow().n_chunks() } @@ -452,10 +488,115 @@ } _ => self.series.borrow().median(), } } + pub fn quantile( + &self, + quantile: f64, + interpolation: Wrap<QuantileInterpolOptions>, + ) -> RbResult<Value> { + Ok(Wrap( + self.series + .borrow() + .quantile_as_series(quantile, interpolation.0) + .map_err(|_| RbValueError::new_err("invalid quantile".into()))? + .get(0), + ) + .into()) + } + + pub fn clone(&self) -> Self { + RbSeries::new(self.series.borrow().clone()) + } + + pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> { + let binding = mask.series.borrow(); + let mask = binding.bool().map_err(RbPolarsErr::from)?; + let s = self + .series + .borrow() + .zip_with(mask, &other.series.borrow()) + .map_err(RbPolarsErr::from)?; + Ok(RbSeries::new(s)) + } + + pub fn to_dummies(&self) -> RbResult<RbDataFrame> { + let df = self + .series + .borrow() + .to_dummies() + .map_err(RbPolarsErr::from)?; + Ok(df.into()) + } + + pub fn peak_max(&self) -> Self { + self.series.borrow().peak_max().into_series().into() + } + + pub fn peak_min(&self) -> Self { + self.series.borrow().peak_min().into_series().into() + } + + pub fn n_unique(&self) -> RbResult<usize> { + let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?; + Ok(n) + } + + pub fn floor(&self) -> RbResult<Self> { + let s = self.series.borrow().floor().map_err(RbPolarsErr::from)?; + Ok(s.into()) + } + + pub fn shrink_to_fit(&self) { + self.series.borrow_mut().shrink_to_fit(); + } + + pub fn dot(&self, other: &RbSeries) -> Option<f64> { + self.series.borrow().dot(&other.series.borrow()) + } + + pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> { + let out = self.series.borrow().skew(bias).map_err(RbPolarsErr::from)?; + Ok(out) + } + + pub fn kurtosis(&self, fisher: bool, bias: bool) -> RbResult<Option<f64>> { + let out = self + .series + .borrow() + .kurtosis(fisher, bias) + .map_err(RbPolarsErr::from)?; + Ok(out) + } + + pub fn cast(&self, dtype: Wrap<DataType>, strict: bool) -> RbResult<Self> { + let dtype = dtype.0; + let out = if strict { + self.series.borrow().strict_cast(&dtype) + } else { + self.series.borrow().cast(&dtype) + }; + let out = out.map_err(RbPolarsErr::from)?; + Ok(out.into()) + } + + pub fn time_unit(&self) -> Option<String> { + if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() { + Some( + match tu { + TimeUnit::Nanoseconds => "ns", + TimeUnit::Microseconds => "us", + TimeUnit::Milliseconds => "ms", + } + .to_string(), + ) + } else { + None + } + } + // dispatch dynamically in future? pub fn cumsum(&self, reverse: bool) -> Self { self.series.borrow().cumsum(reverse).into() } @@ -466,10 +607,32 @@ pub fn cummin(&self, reverse: bool) -> Self { self.series.borrow().cummin(reverse).into() } + pub fn cumprod(&self, reverse: bool) -> Self { + self.series.borrow().cumprod(reverse).into() + } + pub fn slice(&self, offset: i64, length: usize) -> Self { let series = self.series.borrow().slice(offset, length); series.into() } + + pub fn ceil(&self) -> RbResult<Self> { + let s = self.series.borrow().ceil().map_err(RbPolarsErr::from)?; + Ok(s.into()) + } + + pub fn round(&self, decimals: u32) -> RbResult<Self> { + let s = self + .series + .borrow() + .round(decimals) + .map_err(RbPolarsErr::from)?; + Ok(s.into()) + } +} + +pub fn to_rbseries_collection(s: Vec<Series>) -> Vec<RbSeries> { + s.into_iter().map(|v| RbSeries::new(v)).collect() }