use magnus::exception::arg_error; use magnus::{Error, RArray, Value}; use polars::prelude::*; use polars::series::IsSorted; use std::cell::RefCell; use crate::conversion::*; use crate::list_construction::rb_seq_to_list; use crate::set::set_at_idx; use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError}; #[magnus::wrap(class = "Polars::RbSeries")] pub struct RbSeries { pub series: RefCell, } impl From for RbSeries { fn from(series: Series) -> Self { RbSeries::new(series) } } impl RbSeries { pub fn new(series: Series) -> Self { RbSeries { series: RefCell::new(series), } } pub fn is_sorted_flag(&self) -> bool { matches!(self.series.borrow().is_sorted(), IsSorted::Ascending) } pub fn is_sorted_reverse_flag(&self) -> bool { matches!(self.series.borrow().is_sorted(), IsSorted::Descending) } pub fn new_opt_bool(name: String, obj: RArray, strict: bool) -> RbResult { let len = obj.len(); let mut builder = BooleanChunkedBuilder::new(&name, len); unsafe { for item in obj.as_slice().iter() { if item.is_nil() { builder.append_null() } else { match item.try_convert::() { Ok(val) => builder.append_value(val), Err(e) => { if strict { return Err(e); } builder.append_null() } } } } } let ca = builder.finish(); let s = ca.into_series(); Ok(RbSeries::new(s)) } } fn new_primitive(name: &str, obj: RArray, strict: bool) -> RbResult where T: PolarsNumericType, ChunkedArray: IntoSeries, T::Native: magnus::TryConvert, { let len = obj.len(); let mut builder = PrimitiveChunkedBuilder::::new(name, len); unsafe { for item in obj.as_slice().iter() { if item.is_nil() { builder.append_null() } else { match item.try_convert::() { Ok(val) => builder.append_value(val), Err(e) => { if strict { return Err(e); } builder.append_null() } } } } } let ca = builder.finish(); let s = ca.into_series(); Ok(RbSeries::new(s)) } // Init with lists that can contain Nones macro_rules! init_method_opt { ($name:ident, $type:ty, $native: ty) => { impl RbSeries { pub fn $name(name: String, obj: RArray, strict: bool) -> RbResult { new_primitive::<$type>(&name, obj, strict) } } }; } init_method_opt!(new_opt_u8, UInt8Type, u8); init_method_opt!(new_opt_u16, UInt16Type, u16); init_method_opt!(new_opt_u32, UInt32Type, u32); init_method_opt!(new_opt_u64, UInt64Type, u64); init_method_opt!(new_opt_i8, Int8Type, i8); init_method_opt!(new_opt_i16, Int16Type, i16); init_method_opt!(new_opt_i32, Int32Type, i32); init_method_opt!(new_opt_i64, Int64Type, i64); init_method_opt!(new_opt_f32, Float32Type, f32); init_method_opt!(new_opt_f64, Float64Type, f64); impl RbSeries { pub fn new_str(name: String, val: Wrap, _strict: bool) -> Self { let mut s = val.0.into_series(); s.rename(&name); RbSeries::new(s) } pub fn new_object(name: String, val: RArray, _strict: bool) -> RbResult { let val = val .each() .map(|v| v.map(ObjectValue::from)) .collect::>>()?; let s = ObjectChunked::::new_from_vec(&name, val).into_series(); Ok(s.into()) } pub fn new_list(name: String, seq: Value, dtype: Wrap) -> RbResult { rb_seq_to_list(&name, seq, &dtype.0).map(|s| s.into()) } pub fn estimated_size(&self) -> usize { self.series.borrow().estimated_size() } pub fn get_fmt(&self, index: usize, str_lengths: usize) -> String { let val = format!("{}", self.series.borrow().get(index)); if let DataType::Utf8 | DataType::Categorical(_) = self.series.borrow().dtype() { let v_trunc = &val[..val .char_indices() .take(str_lengths) .last() .map(|(i, c)| i + c.len_utf8()) .unwrap_or(0)]; if val == v_trunc { val } else { format!("{}...", v_trunc) } } else { val } } pub fn rechunk(&self, in_place: bool) -> Option { let series = self.series.borrow_mut().rechunk(); if in_place { *self.series.borrow_mut() = series; None } else { Some(series.into()) } } pub fn get_idx(&self, idx: usize) -> Value { Wrap(self.series.borrow().get(idx)).into() } pub fn bitand(&self, other: &RbSeries) -> RbResult { let out = self .series .borrow() .bitand(&other.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(out.into()) } pub fn bitor(&self, other: &RbSeries) -> RbResult { let out = self .series .borrow() .bitor(&other.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(out.into()) } pub fn bitxor(&self, other: &RbSeries) -> RbResult { let out = self .series .borrow() .bitxor(&other.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(out.into()) } pub fn chunk_lengths(&self) -> Vec { self.series.borrow().chunk_lengths().collect() } pub fn name(&self) -> String { self.series.borrow().name().into() } pub fn rename(&self, name: String) { self.series.borrow_mut().rename(&name); } pub fn dtype(&self) -> Value { Wrap(self.series.borrow().dtype().clone()).into() } pub fn inner_dtype(&self) -> Option { self.series .borrow() .dtype() .inner_dtype() .map(|dt| Wrap(dt.clone()).into()) } pub fn set_sorted(&self, reverse: bool) -> Self { let mut out = self.series.borrow().clone(); if reverse { out.set_sorted(IsSorted::Descending); } else { out.set_sorted(IsSorted::Ascending) } out.into() } pub fn mean(&self) -> Option { match self.series.borrow().dtype() { DataType::Boolean => { let s = self.series.borrow().cast(&DataType::UInt8).unwrap(); s.mean() } _ => self.series.borrow().mean(), } } pub fn max(&self) -> Value { Wrap(self.series.borrow().max_as_series().get(0)).into() } pub fn min(&self) -> Value { Wrap(self.series.borrow().min_as_series().get(0)).into() } pub fn sum(&self) -> Value { Wrap(self.series.borrow().sum_as_series().get(0)).into() } pub fn n_chunks(&self) -> usize { self.series.borrow().n_chunks() } pub fn append(&self, other: &RbSeries) -> RbResult<()> { let mut binding = self.series.borrow_mut(); let res = binding.append(&other.series.borrow()); if let Err(e) = res { Err(Error::runtime_error(e.to_string())) } else { Ok(()) } } pub fn extend(&self, other: &RbSeries) -> RbResult<()> { self.series .borrow_mut() .extend(&other.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(()) } pub fn new_from_index(&self, index: usize, length: usize) -> RbResult { if index >= self.series.borrow().len() { Err(Error::new(arg_error(), "index is out of bounds")) } else { Ok(self.series.borrow().new_from_index(index, length).into()) } } pub fn filter(&self, filter: &RbSeries) -> RbResult { let filter_series = &filter.series.borrow(); if let Ok(ca) = filter_series.bool() { let series = self.series.borrow().filter(ca).unwrap(); Ok(series.into()) } else { Err(Error::runtime_error("Expected a boolean mask".to_string())) } } pub fn add(&self, other: &RbSeries) -> Self { (&*self.series.borrow() + &*other.series.borrow()).into() } pub fn sub(&self, other: &RbSeries) -> Self { (&*self.series.borrow() - &*other.series.borrow()).into() } pub fn mul(&self, other: &RbSeries) -> Self { (&*self.series.borrow() * &*other.series.borrow()).into() } pub fn div(&self, other: &RbSeries) -> Self { (&*self.series.borrow() / &*other.series.borrow()).into() } pub fn rem(&self, other: &RbSeries) -> Self { (&*self.series.borrow() % &*other.series.borrow()).into() } pub fn sort(&self, reverse: bool) -> Self { (self.series.borrow_mut().sort(reverse)).into() } pub fn value_counts(&self, sorted: bool) -> RbResult { let df = self .series .borrow() .value_counts(true, sorted) .map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn arg_min(&self) -> Option { self.series.borrow().arg_min() } pub fn arg_max(&self) -> Option { self.series.borrow().arg_max() } pub fn take_with_series(&self, indices: &RbSeries) -> RbResult { let binding = indices.series.borrow(); let idx = binding.idx().map_err(RbPolarsErr::from)?; let take = self.series.borrow().take(idx).map_err(RbPolarsErr::from)?; Ok(RbSeries::new(take)) } pub fn null_count(&self) -> RbResult { Ok(self.series.borrow().null_count()) } pub fn has_validity(&self) -> bool { self.series.borrow().has_validity() } pub fn sample_n( &self, n: usize, with_replacement: bool, shuffle: bool, seed: Option, ) -> RbResult { let s = self .series .borrow() .sample_n(n, with_replacement, shuffle, seed) .map_err(RbPolarsErr::from)?; Ok(s.into()) } pub fn sample_frac( &self, frac: f64, with_replacement: bool, shuffle: bool, seed: Option, ) -> RbResult { let s = self .series .borrow() .sample_frac(frac, with_replacement, shuffle, seed) .map_err(RbPolarsErr::from)?; Ok(s.into()) } pub fn series_equal(&self, other: &RbSeries, null_equal: bool, strict: bool) -> bool { if strict { self.series.borrow().eq(&other.series.borrow()) } else if null_equal { self.series .borrow() .series_equal_missing(&other.series.borrow()) } else { self.series.borrow().series_equal(&other.series.borrow()) } } pub fn eq(&self, rhs: &RbSeries) -> RbResult { let s = self .series .borrow() .equal(&*rhs.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(Self::new(s.into_series())) } pub fn neq(&self, rhs: &RbSeries) -> RbResult { let s = self .series .borrow() .not_equal(&*rhs.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(Self::new(s.into_series())) } pub fn gt(&self, rhs: &RbSeries) -> RbResult { let s = self .series .borrow() .gt(&*rhs.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(Self::new(s.into_series())) } pub fn gt_eq(&self, rhs: &RbSeries) -> RbResult { let s = self .series .borrow() .gt_eq(&*rhs.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(Self::new(s.into_series())) } pub fn lt(&self, rhs: &RbSeries) -> RbResult { let s = self .series .borrow() .lt(&*rhs.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(Self::new(s.into_series())) } pub fn lt_eq(&self, rhs: &RbSeries) -> RbResult { let s = self .series .borrow() .lt_eq(&*rhs.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(Self::new(s.into_series())) } pub fn not(&self) -> RbResult { let binding = self.series.borrow(); let bool = binding.bool().map_err(RbPolarsErr::from)?; Ok((!bool).into_series().into()) } pub fn to_s(&self) -> String { format!("{}", self.series.borrow()) } pub fn len(&self) -> usize { self.series.borrow().len() } pub fn to_a(&self) -> RArray { let series = self.series.borrow(); if let Ok(s) = series.f32() { s.into_iter().collect() } else if let Ok(s) = series.f64() { s.into_iter().collect() } else if let Ok(s) = series.i8() { s.into_iter().collect() } else if let Ok(s) = series.i16() { s.into_iter().collect() } else if let Ok(s) = series.i32() { s.into_iter().collect() } else if let Ok(s) = series.i64() { s.into_iter().collect() } else if let Ok(s) = series.u8() { s.into_iter().collect() } else if let Ok(s) = series.u16() { s.into_iter().collect() } else if let Ok(s) = series.u32() { s.into_iter().collect() } else if let Ok(s) = series.u64() { s.into_iter().collect() } else if let Ok(s) = series.bool() { s.into_iter().collect() } else if let Ok(s) = series.utf8() { s.into_iter().collect() } else if let Ok(_s) = series.date() { let a = RArray::with_capacity(series.len()); for v in series.iter() { a.push::(Wrap(v).into()).unwrap(); } a } else { unimplemented!(); } } pub fn median(&self) -> Option { match self.series.borrow().dtype() { DataType::Boolean => { let s = self.series.borrow().cast(&DataType::UInt8).unwrap(); s.median() } _ => self.series.borrow().median(), } } pub fn quantile( &self, quantile: f64, interpolation: Wrap, ) -> RbResult { Ok(Wrap( self.series .borrow() .quantile_as_series(quantile, interpolation.0) .map_err(|_| RbValueError::new_err("invalid quantile".into()))? .get(0), ) .into()) } pub fn clone(&self) -> Self { RbSeries::new(self.series.borrow().clone()) } pub fn zip_with(&self, mask: &RbSeries, other: &RbSeries) -> RbResult { let binding = mask.series.borrow(); let mask = binding.bool().map_err(RbPolarsErr::from)?; let s = self .series .borrow() .zip_with(mask, &other.series.borrow()) .map_err(RbPolarsErr::from)?; Ok(RbSeries::new(s)) } pub fn to_dummies(&self) -> RbResult { let df = self .series .borrow() .to_dummies() .map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn peak_max(&self) -> Self { self.series.borrow().peak_max().into_series().into() } pub fn peak_min(&self) -> Self { self.series.borrow().peak_min().into_series().into() } pub fn n_unique(&self) -> RbResult { let n = self.series.borrow().n_unique().map_err(RbPolarsErr::from)?; Ok(n) } pub fn floor(&self) -> RbResult { let s = self.series.borrow().floor().map_err(RbPolarsErr::from)?; Ok(s.into()) } pub fn shrink_to_fit(&self) { self.series.borrow_mut().shrink_to_fit(); } pub fn dot(&self, other: &RbSeries) -> Option { self.series.borrow().dot(&other.series.borrow()) } pub fn skew(&self, bias: bool) -> RbResult> { let out = self.series.borrow().skew(bias).map_err(RbPolarsErr::from)?; Ok(out) } pub fn kurtosis(&self, fisher: bool, bias: bool) -> RbResult> { let out = self .series .borrow() .kurtosis(fisher, bias) .map_err(RbPolarsErr::from)?; Ok(out) } pub fn cast(&self, dtype: Wrap, strict: bool) -> RbResult { let dtype = dtype.0; let out = if strict { self.series.borrow().strict_cast(&dtype) } else { self.series.borrow().cast(&dtype) }; let out = out.map_err(RbPolarsErr::from)?; Ok(out.into()) } pub fn time_unit(&self) -> Option { if let DataType::Datetime(tu, _) | DataType::Duration(tu) = self.series.borrow().dtype() { Some( match tu { TimeUnit::Nanoseconds => "ns", TimeUnit::Microseconds => "us", TimeUnit::Milliseconds => "ms", } .to_string(), ) } else { None } } pub fn set_at_idx(&self, idx: &RbSeries, values: &RbSeries) -> RbResult<()> { let mut s = self.series.borrow_mut(); match set_at_idx(s.clone(), &idx.series.borrow(), &values.series.borrow()) { Ok(out) => { *s = out; Ok(()) } Err(e) => Err(RbPolarsErr::from(e)), } } } macro_rules! impl_eq_num { ($name:ident, $type:ty) => { impl RbSeries { pub fn $name(&self, rhs: $type) -> RbResult { let s = self.series.borrow().equal(rhs).map_err(RbPolarsErr::from)?; Ok(RbSeries::new(s.into_series())) } } }; } impl_eq_num!(eq_u8, u8); impl_eq_num!(eq_u16, u16); impl_eq_num!(eq_u32, u32); impl_eq_num!(eq_u64, u64); impl_eq_num!(eq_i8, i8); impl_eq_num!(eq_i16, i16); impl_eq_num!(eq_i32, i32); impl_eq_num!(eq_i64, i64); impl_eq_num!(eq_f32, f32); impl_eq_num!(eq_f64, f64); // impl_eq_num!(eq_str, &str); macro_rules! impl_neq_num { ($name:ident, $type:ty) => { impl RbSeries { pub fn $name(&self, rhs: $type) -> RbResult { let s = self .series .borrow() .not_equal(rhs) .map_err(RbPolarsErr::from)?; Ok(RbSeries::new(s.into_series())) } } }; } impl_neq_num!(neq_u8, u8); impl_neq_num!(neq_u16, u16); impl_neq_num!(neq_u32, u32); impl_neq_num!(neq_u64, u64); impl_neq_num!(neq_i8, i8); impl_neq_num!(neq_i16, i16); impl_neq_num!(neq_i32, i32); impl_neq_num!(neq_i64, i64); impl_neq_num!(neq_f32, f32); impl_neq_num!(neq_f64, f64); // impl_neq_num!(neq_str, &str); macro_rules! impl_gt_num { ($name:ident, $type:ty) => { impl RbSeries { pub fn $name(&self, rhs: $type) -> RbResult { let s = self.series.borrow().gt(rhs).map_err(RbPolarsErr::from)?; Ok(RbSeries::new(s.into_series())) } } }; } impl_gt_num!(gt_u8, u8); impl_gt_num!(gt_u16, u16); impl_gt_num!(gt_u32, u32); impl_gt_num!(gt_u64, u64); impl_gt_num!(gt_i8, i8); impl_gt_num!(gt_i16, i16); impl_gt_num!(gt_i32, i32); impl_gt_num!(gt_i64, i64); impl_gt_num!(gt_f32, f32); impl_gt_num!(gt_f64, f64); // impl_gt_num!(gt_str, &str); macro_rules! impl_gt_eq_num { ($name:ident, $type:ty) => { impl RbSeries { pub fn $name(&self, rhs: $type) -> RbResult { let s = self.series.borrow().gt_eq(rhs).map_err(RbPolarsErr::from)?; Ok(RbSeries::new(s.into_series())) } } }; } impl_gt_eq_num!(gt_eq_u8, u8); impl_gt_eq_num!(gt_eq_u16, u16); impl_gt_eq_num!(gt_eq_u32, u32); impl_gt_eq_num!(gt_eq_u64, u64); impl_gt_eq_num!(gt_eq_i8, i8); impl_gt_eq_num!(gt_eq_i16, i16); impl_gt_eq_num!(gt_eq_i32, i32); impl_gt_eq_num!(gt_eq_i64, i64); impl_gt_eq_num!(gt_eq_f32, f32); impl_gt_eq_num!(gt_eq_f64, f64); // impl_gt_eq_num!(gt_eq_str, &str); macro_rules! impl_lt_num { ($name:ident, $type:ty) => { impl RbSeries { pub fn $name(&self, rhs: $type) -> RbResult { let s = self.series.borrow().lt(rhs).map_err(RbPolarsErr::from)?; Ok(RbSeries::new(s.into_series())) } } }; } impl_lt_num!(lt_u8, u8); impl_lt_num!(lt_u16, u16); impl_lt_num!(lt_u32, u32); impl_lt_num!(lt_u64, u64); impl_lt_num!(lt_i8, i8); impl_lt_num!(lt_i16, i16); impl_lt_num!(lt_i32, i32); impl_lt_num!(lt_i64, i64); impl_lt_num!(lt_f32, f32); impl_lt_num!(lt_f64, f64); // impl_lt_num!(lt_str, &str); macro_rules! impl_lt_eq_num { ($name:ident, $type:ty) => { impl RbSeries { pub fn $name(&self, rhs: $type) -> RbResult { let s = self.series.borrow().lt_eq(rhs).map_err(RbPolarsErr::from)?; Ok(RbSeries::new(s.into_series())) } } }; } impl_lt_eq_num!(lt_eq_u8, u8); impl_lt_eq_num!(lt_eq_u16, u16); impl_lt_eq_num!(lt_eq_u32, u32); impl_lt_eq_num!(lt_eq_u64, u64); impl_lt_eq_num!(lt_eq_i8, i8); impl_lt_eq_num!(lt_eq_i16, i16); impl_lt_eq_num!(lt_eq_i32, i32); impl_lt_eq_num!(lt_eq_i64, i64); impl_lt_eq_num!(lt_eq_f32, f32); impl_lt_eq_num!(lt_eq_f64, f64); // impl_lt_eq_num!(lt_eq_str, &str); pub fn to_series_collection(rs: RArray) -> RbResult> { let mut series = Vec::new(); for item in rs.each() { series.push(item?.try_convert::<&RbSeries>()?.series.borrow().clone()); } Ok(series) } pub fn to_rbseries_collection(s: Vec) -> Vec { s.into_iter().map(RbSeries::new).collect() } impl RbSeries { pub fn new_opt_date(name: String, values: RArray, _strict: Option) -> RbResult { let len = values.len(); let mut builder = PrimitiveChunkedBuilder::::new(&name, len); for item in values.each() { let v = item?; if v.is_nil() { builder.append_null(); } else { // convert to DateTime for UTC let v: Value = v.funcall("to_datetime", ())?; let v: Value = v.funcall("to_time", ())?; let v: Value = v.funcall("to_i", ())?; // TODO use strict builder.append_value(v.try_convert::()? / 86400); } } let ca: ChunkedArray = builder.finish(); Ok(ca.into_date().into_series().into()) } pub fn new_opt_datetime(name: String, values: RArray, _strict: Option) -> RbResult { let len = values.len(); let mut builder = PrimitiveChunkedBuilder::::new(&name, len); for item in values.each() { let v = item?; if v.is_nil() { builder.append_null(); } else { let sec: i64 = v.funcall("to_i", ())?; let nsec: i64 = v.funcall("nsec", ())?; // TODO use strict builder.append_value(sec * 1_000_000_000 + nsec); } } let ca: ChunkedArray = builder.finish(); Ok(ca .into_datetime(TimeUnit::Nanoseconds, None) .into_series() .into()) } }