ext/polars/src/series.rs in polars-df-0.1.0 vs ext/polars/src/series.rs in polars-df-0.1.1
- old
+ new
@@ -1,13 +1,14 @@
-use crate::conversion::wrap;
-use crate::{RbDataFrame, RbPolarsErr, RbResult};
use magnus::exception::arg_error;
use magnus::{Error, RArray, Value};
use polars::prelude::*;
use polars::series::IsSorted;
use std::cell::RefCell;
+use crate::conversion::*;
+use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError};
+
#[magnus::wrap(class = "Polars::RbSeries")]
pub struct RbSeries {
pub series: RefCell<Series>,
}
@@ -22,10 +23,18 @@
RbSeries {
series: RefCell::new(series),
}
}
+ pub fn is_sorted_flag(&self) -> bool {
+ matches!(self.series.borrow().is_sorted(), IsSorted::Ascending)
+ }
+
+ pub fn is_sorted_reverse_flag(&self) -> bool {
+ matches!(self.series.borrow().is_sorted(), IsSorted::Descending)
+ }
+
pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult<RbSeries> {
let len = obj.len();
let mut builder = BooleanChunkedBuilder::new(&name, len);
unsafe {
@@ -112,20 +121,47 @@
let mut s = Utf8Chunked::new(&name, v).into_series();
s.rename(&name);
Ok(RbSeries::new(s))
}
+ pub fn estimated_size(&self) -> usize {
+ self.series.borrow().estimated_size()
+ }
+
+ pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String {
+ let val = format!("{}", self.series.borrow().get(index));
+ if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() {
+ let v_trunc = &val[..val
+ .char_indices()
+ .take(str_lengths)
+ .last()
+ .map(|(i, c)| i + c.len_utf8())
+ .unwrap_or(0)];
+ if val == v_trunc {
+ val
+ } else {
+ format!("{}...", v_trunc)
+ }
+ } else {
+ val
+ }
+ }
+
pub fn rechunk(&self, in_place: bool) -> Option<Self> {
let series = self.series.borrow_mut().rechunk();
if in_place {
*self.series.borrow_mut() = series;
None
} else {
Some(series.into())
}
}
+ pub fn get_idx(&self, idx: usize) -> Value {
+ Wrap(self.series.borrow().get(idx)).into()
+ }
+
pub fn bitand(&self, other: &RbSeries) -> RbResult<Self> {
let out = self
.series
.borrow()
.bitand(&other.series.borrow())
@@ -194,19 +230,19 @@
_ => self.series.borrow().mean(),
}
}
pub fn max(&self) -> Value {
- wrap(self.series.borrow().max_as_series().get(0))
+ Wrap(self.series.borrow().max_as_series().get(0)).into()
}
pub fn min(&self) -> Value {
- wrap(self.series.borrow().min_as_series().get(0))
+ Wrap(self.series.borrow().min_as_series().get(0)).into()
}
pub fn sum(&self) -> Value {
- wrap(self.series.borrow().sum_as_series().get(0))
+ Wrap(self.series.borrow().sum_as_series().get(0)).into()
}
pub fn n_chunks(&self) -> usize {
self.series.borrow().n_chunks()
}
@@ -452,10 +488,115 @@
}
_ => self.series.borrow().median(),
}
}
+ pub fn quantile(
+ &self,
+ quantile: f64,
+ interpolation: Wrap<QuantileInterpolOptions>,
+ ) -> RbResult<Value> {
+ Ok(Wrap(
+ self.series
+ .borrow()
+ .quantile_as_series(quantile, interpolation.0)
+ .map_err(|_| RbValueError::new_err("invalid quantile".into()))?
+ .get(0),
+ )
+ .into())
+ }
+
+ pub fn clone(&self) -> Self {
+ RbSeries::new(self.series.borrow().clone())
+ }
+
+ pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult<Self> {
+ let binding = mask.series.borrow();
+ let mask = binding.bool().map_err(RbPolarsErr::from)?;
+ let s = self
+ .series
+ .borrow()
+ .zip_with(mask, &other.series.borrow())
+ .map_err(RbPolarsErr::from)?;
+ Ok(RbSeries::new(s))
+ }
+
+ pub fn to_dummies(&self) -> RbResult<RbDataFrame> {
+ let df = self
+ .series
+ .borrow()
+ .to_dummies()
+ .map_err(RbPolarsErr::from)?;
+ Ok(df.into())
+ }
+
+ pub fn peak_max(&self) -> Self {
+ self.series.borrow().peak_max().into_series().into()
+ }
+
+ pub fn peak_min(&self) -> Self {
+ self.series.borrow().peak_min().into_series().into()
+ }
+
+ pub fn n_unique(&self) -> RbResult<usize> {
+ let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?;
+ Ok(n)
+ }
+
+ pub fn floor(&self) -> RbResult<Self> {
+ let s = self.series.borrow().floor().map_err(RbPolarsErr::from)?;
+ Ok(s.into())
+ }
+
+ pub fn shrink_to_fit(&self) {
+ self.series.borrow_mut().shrink_to_fit();
+ }
+
+ pub fn dot(&self, other: &RbSeries) -> Option<f64> {
+ self.series.borrow().dot(&other.series.borrow())
+ }
+
+ pub fn skew(&self, bias: bool) -> RbResult<Option<f64>> {
+ let out = self.series.borrow().skew(bias).map_err(RbPolarsErr::from)?;
+ Ok(out)
+ }
+
+ pub fn kurtosis(&self, fisher: bool, bias: bool) -> RbResult<Option<f64>> {
+ let out = self
+ .series
+ .borrow()
+ .kurtosis(fisher, bias)
+ .map_err(RbPolarsErr::from)?;
+ Ok(out)
+ }
+
+ pub fn cast(&self, dtype: Wrap<DataType>, strict: bool) -> RbResult<Self> {
+ let dtype = dtype.0;
+ let out = if strict {
+ self.series.borrow().strict_cast(&dtype)
+ } else {
+ self.series.borrow().cast(&dtype)
+ };
+ let out = out.map_err(RbPolarsErr::from)?;
+ Ok(out.into())
+ }
+
+ pub fn time_unit(&self) -> Option<String> {
+ if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() {
+ Some(
+ match tu {
+ TimeUnit::Nanoseconds => "ns",
+ TimeUnit::Microseconds => "us",
+ TimeUnit::Milliseconds => "ms",
+ }
+ .to_string(),
+ )
+ } else {
+ None
+ }
+ }
+
// dispatch dynamically in future?
pub fn cumsum(&self, reverse: bool) -> Self {
self.series.borrow().cumsum(reverse).into()
}
@@ -466,10 +607,32 @@
pub fn cummin(&self, reverse: bool) -> Self {
self.series.borrow().cummin(reverse).into()
}
+ pub fn cumprod(&self, reverse: bool) -> Self {
+ self.series.borrow().cumprod(reverse).into()
+ }
+
pub fn slice(&self, offset: i64, length: usize) -> Self {
let series = self.series.borrow().slice(offset, length);
series.into()
}
+
+ pub fn ceil(&self) -> RbResult<Self> {
+ let s = self.series.borrow().ceil().map_err(RbPolarsErr::from)?;
+ Ok(s.into())
+ }
+
+ pub fn round(&self, decimals: u32) -> RbResult<Self> {
+ let s = self
+ .series
+ .borrow()
+ .round(decimals)
+ .map_err(RbPolarsErr::from)?;
+ Ok(s.into())
+ }
+}
+
+pub fn to_rbseries_collection(s: Vec<Series>) -> Vec<RbSeries> {
+ s.into_iter().map(|v| RbSeries::new(v)).collect()
}