use either::Either; use magnus::{prelude::*, typed_data::Obj, IntoValue, RArray, Value}; use polars::frame::NullStrategy; use polars::prelude::pivot::{pivot, pivot_stable}; use polars::prelude::*; use crate::conversion::*; use crate::map::dataframe::{ apply_lambda_unknown, apply_lambda_with_bool_out_type, apply_lambda_with_primitive_out_type, apply_lambda_with_utf8_out_type, }; use crate::series::{to_rbseries_collection, to_series_collection}; use crate::{RbDataFrame, RbExpr, RbLazyFrame, RbPolarsErr, RbResult, RbSeries}; impl RbDataFrame { pub fn init(columns: RArray) -> RbResult { let mut cols = Vec::new(); for i in columns.into_iter() { cols.push(<&RbSeries>::try_convert(i)?.series.borrow().clone()); } let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?; Ok(RbDataFrame::new(df)) } pub fn estimated_size(&self) -> usize { self.df.borrow().estimated_size() } pub fn dtype_strings(&self) -> Vec { self.df .borrow() .get_columns() .iter() .map(|s| format!("{}", s.dtype())) .collect() } pub fn add(&self, s: &RbSeries) -> RbResult { let df = (&*self.df.borrow() + &*s.series.borrow()).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn sub(&self, s: &RbSeries) -> RbResult { let df = (&*self.df.borrow() - &*s.series.borrow()).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn div(&self, s: &RbSeries) -> RbResult { let df = (&*self.df.borrow() / &*s.series.borrow()).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn mul(&self, s: &RbSeries) -> RbResult { let df = (&*self.df.borrow() * &*s.series.borrow()).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn rem(&self, s: &RbSeries) -> RbResult { let df = (&*self.df.borrow() % &*s.series.borrow()).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn add_df(&self, s: &Self) -> RbResult { let df = (&*self.df.borrow() + &*s.df.borrow()).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn sub_df(&self, s: &Self) -> RbResult { let df = (&*self.df.borrow() - &*s.df.borrow()).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn div_df(&self, s: &Self) -> RbResult { let df = (&*self.df.borrow() / &*s.df.borrow()).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn mul_df(&self, s: &Self) -> RbResult { let df = (&*self.df.borrow() * &*s.df.borrow()).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn rem_df(&self, s: &Self) -> RbResult { let df = (&*self.df.borrow() % &*s.df.borrow()).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn sample_n( &self, n: &RbSeries, with_replacement: bool, shuffle: bool, seed: Option, ) -> RbResult { let df = self .df .borrow() .sample_n(&n.series.borrow(), with_replacement, shuffle, seed) .map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn sample_frac( &self, frac: &RbSeries, with_replacement: bool, shuffle: bool, seed: Option, ) -> RbResult { let df = self .df .borrow() .sample_frac(&frac.series.borrow(), with_replacement, shuffle, seed) .map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn rechunk(&self) -> Self { let mut df = self.df.borrow_mut().clone(); df.as_single_chunk_par(); df.into() } pub fn as_str(&self) -> String { format!("{}", self.df.borrow()) } pub fn get_columns(&self) -> RArray { let cols = self.df.borrow().get_columns().to_vec(); to_rbseries_collection(cols) } pub fn columns(&self) -> Vec { self.df .borrow() .get_column_names() .iter() .map(|v| v.to_string()) .collect() } pub fn set_column_names(&self, names: Vec) -> RbResult<()> { self.df .borrow_mut() .set_column_names(&names) .map_err(RbPolarsErr::from)?; Ok(()) } pub fn dtypes(&self) -> RArray { RArray::from_iter( self.df .borrow() .iter() .map(|s| Wrap(s.dtype().clone()).into_value()), ) } pub fn n_chunks(&self) -> usize { self.df.borrow().n_chunks() } pub fn shape(&self) -> (usize, usize) { self.df.borrow().shape() } pub fn height(&self) -> usize { self.df.borrow().height() } pub fn width(&self) -> usize { self.df.borrow().width() } pub fn hstack(&self, columns: RArray) -> RbResult { let columns = to_series_collection(columns)?; let df = self .df .borrow() .hstack(&columns) .map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn hstack_mut(&self, columns: RArray) -> RbResult<()> { let columns = to_series_collection(columns)?; self.df .borrow_mut() .hstack_mut(&columns) .map_err(RbPolarsErr::from)?; Ok(()) } pub fn vstack(&self, df: &RbDataFrame) -> RbResult { let df = self .df .borrow() .vstack(&df.df.borrow()) .map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn vstack_mut(&self, df: &RbDataFrame) -> RbResult<()> { self.df .borrow_mut() .vstack_mut(&df.df.borrow()) .map_err(RbPolarsErr::from)?; Ok(()) } pub fn extend(&self, df: &RbDataFrame) -> RbResult<()> { self.df .borrow_mut() .extend(&df.df.borrow()) .map_err(RbPolarsErr::from)?; Ok(()) } pub fn drop_in_place(&self, name: String) -> RbResult { let s = self .df .borrow_mut() .drop_in_place(&name) .map_err(RbPolarsErr::from)?; Ok(RbSeries::new(s)) } pub fn select_at_idx(&self, idx: usize) -> Option { self.df .borrow() .select_at_idx(idx) .map(|s| RbSeries::new(s.clone())) } pub fn get_column_index(&self, name: String) -> Option { self.df.borrow().get_column_index(&name) } pub fn get_column(&self, name: String) -> RbResult { self.df .borrow() .column(&name) .map(|s| RbSeries::new(s.clone())) .map_err(RbPolarsErr::from) } pub fn select(&self, selection: Vec) -> RbResult { let df = self .df .borrow() .select(selection) .map_err(RbPolarsErr::from)?; Ok(RbDataFrame::new(df)) } pub fn gather(&self, indices: Vec) -> RbResult { let indices = IdxCa::from_vec("", indices); let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?; Ok(RbDataFrame::new(df)) } pub fn take_with_series(&self, indices: &RbSeries) -> RbResult { let binding = indices.series.borrow(); let idx = binding.idx().map_err(RbPolarsErr::from)?; let df = self.df.borrow().take(idx).map_err(RbPolarsErr::from)?; Ok(RbDataFrame::new(df)) } pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> { self.df .borrow_mut() .replace(&column, new_col.series.borrow().clone()) .map_err(RbPolarsErr::from)?; Ok(()) } pub fn replace_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> { self.df .borrow_mut() .replace_column(index, new_col.series.borrow().clone()) .map_err(RbPolarsErr::from)?; Ok(()) } pub fn insert_column(&self, index: usize, new_col: &RbSeries) -> RbResult<()> { self.df .borrow_mut() .insert_column(index, new_col.series.borrow().clone()) .map_err(RbPolarsErr::from)?; Ok(()) } pub fn slice(&self, offset: usize, length: Option) -> Self { let df = self.df.borrow().slice( offset as i64, length.unwrap_or_else(|| self.df.borrow().height()), ); df.into() } pub fn head(&self, length: Option) -> Self { self.df.borrow().head(length).into() } pub fn tail(&self, length: Option) -> Self { self.df.borrow().tail(length).into() } pub fn is_unique(&self) -> RbResult { let mask = self.df.borrow().is_unique().map_err(RbPolarsErr::from)?; Ok(mask.into_series().into()) } pub fn is_duplicated(&self) -> RbResult { let mask = self .df .borrow() .is_duplicated() .map_err(RbPolarsErr::from)?; Ok(mask.into_series().into()) } pub fn equals(&self, other: &RbDataFrame, null_equal: bool) -> bool { if null_equal { self.df.borrow().equals_missing(&other.df.borrow()) } else { self.df.borrow().equals(&other.df.borrow()) } } pub fn with_row_index(&self, name: String, offset: Option) -> RbResult { let df = self .df .borrow() .with_row_index(&name, offset) .map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn clone(&self) -> Self { RbDataFrame::new(self.df.borrow().clone()) } pub fn unpivot( &self, on: Vec, index: Vec, value_name: Option, variable_name: Option, ) -> RbResult { let args = UnpivotArgsIR { on: strings_to_smartstrings(on), index: strings_to_smartstrings(index), value_name: value_name.map(|s| s.into()), variable_name: variable_name.map(|s| s.into()), }; let df = self.df.borrow().unpivot2(args).map_err(RbPolarsErr::from)?; Ok(RbDataFrame::new(df)) } #[allow(clippy::too_many_arguments)] pub fn pivot_expr( &self, on: Vec, index: Option>, values: Option>, maintain_order: bool, sort_columns: bool, aggregate_expr: Option<&RbExpr>, separator: Option, ) -> RbResult { let fun = if maintain_order { pivot_stable } else { pivot }; let agg_expr = aggregate_expr.map(|aggregate_expr| aggregate_expr.inner.clone()); let df = fun( &self.df.borrow(), on, index, values, sort_columns, agg_expr, separator.as_deref(), ) .map_err(RbPolarsErr::from)?; Ok(RbDataFrame::new(df)) } pub fn partition_by( &self, by: Vec, maintain_order: bool, include_key: bool, ) -> RbResult { let out = if maintain_order { self.df.borrow().partition_by_stable(by, include_key) } else { self.df.borrow().partition_by(by, include_key) } .map_err(RbPolarsErr::from)?; Ok(RArray::from_iter(out.into_iter().map(RbDataFrame::new))) } pub fn lazy(&self) -> RbLazyFrame { self.df.borrow().clone().lazy().into() } pub fn max_horizontal(&self) -> RbResult> { let s = self .df .borrow() .max_horizontal() .map_err(RbPolarsErr::from)?; Ok(s.map(|s| s.into())) } pub fn min_horizontal(&self) -> RbResult> { let s = self .df .borrow() .min_horizontal() .map_err(RbPolarsErr::from)?; Ok(s.map(|s| s.into())) } pub fn sum_horizontal(&self, ignore_nulls: bool) -> RbResult> { let null_strategy = if ignore_nulls { NullStrategy::Ignore } else { NullStrategy::Propagate }; let s = self .df .borrow() .sum_horizontal(null_strategy) .map_err(RbPolarsErr::from)?; Ok(s.map(|s| s.into())) } pub fn mean_horizontal(&self, ignore_nulls: bool) -> RbResult> { let null_strategy = if ignore_nulls { NullStrategy::Ignore } else { NullStrategy::Propagate }; let s = self .df .borrow() .mean_horizontal(null_strategy) .map_err(RbPolarsErr::from)?; Ok(s.map(|s| s.into())) } pub fn to_dummies( &self, columns: Option>, separator: Option, drop_first: bool, ) -> RbResult { let df = match columns { Some(cols) => self.df.borrow().columns_to_dummies( cols.iter().map(|x| x as &str).collect(), separator.as_deref(), drop_first, ), None => self .df .borrow() .to_dummies(separator.as_deref(), drop_first), } .map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn null_count(&self) -> Self { let df = self.df.borrow().null_count(); df.into() } pub fn map_rows( &self, lambda: Value, output_type: Option>, inference_size: usize, ) -> RbResult<(Value, bool)> { let df = &self.df.borrow(); let output_type = output_type.map(|dt| dt.0); let out = match output_type { Some(DataType::Int32) => { apply_lambda_with_primitive_out_type::(df, lambda, 0, None).into_series() } Some(DataType::Int64) => { apply_lambda_with_primitive_out_type::(df, lambda, 0, None).into_series() } Some(DataType::UInt32) => { apply_lambda_with_primitive_out_type::(df, lambda, 0, None) .into_series() } Some(DataType::UInt64) => { apply_lambda_with_primitive_out_type::(df, lambda, 0, None) .into_series() } Some(DataType::Float32) => { apply_lambda_with_primitive_out_type::(df, lambda, 0, None) .into_series() } Some(DataType::Float64) => { apply_lambda_with_primitive_out_type::(df, lambda, 0, None) .into_series() } Some(DataType::Boolean) => { apply_lambda_with_bool_out_type(df, lambda, 0, None).into_series() } Some(DataType::Date) => { apply_lambda_with_primitive_out_type::(df, lambda, 0, None) .into_date() .into_series() } Some(DataType::Datetime(tu, tz)) => { apply_lambda_with_primitive_out_type::(df, lambda, 0, None) .into_datetime(tu, tz) .into_series() } Some(DataType::String) => { apply_lambda_with_utf8_out_type(df, lambda, 0, None).into_series() } _ => return apply_lambda_unknown(df, lambda, inference_size), }; Ok((Obj::wrap(RbSeries::from(out)).as_value(), false)) } pub fn shrink_to_fit(&self) { self.df.borrow_mut().shrink_to_fit(); } pub fn hash_rows(&self, k0: u64, k1: u64, k2: u64, k3: u64) -> RbResult { let hb = ahash::RandomState::with_seeds(k0, k1, k2, k3); let hash = self .df .borrow_mut() .hash_rows(Some(hb)) .map_err(RbPolarsErr::from)?; Ok(hash.into_series().into()) } pub fn transpose(&self, keep_names_as: Option, column_names: Value) -> RbResult { let new_col_names = if let Ok(name) = >::try_convert(column_names) { Some(Either::Right(name)) } else if let Ok(name) = String::try_convert(column_names) { Some(Either::Left(name)) } else { None }; Ok(self .df .borrow_mut() .transpose(keep_names_as.as_deref(), new_col_names) .map_err(RbPolarsErr::from)? .into()) } pub fn upsample( &self, by: Vec, index_column: String, every: String, stable: bool, ) -> RbResult { let out = if stable { self.df .borrow() .upsample_stable(by, &index_column, Duration::parse(&every)) } else { self.df .borrow() .upsample(by, &index_column, Duration::parse(&every)) }; let out = out.map_err(RbPolarsErr::from)?; Ok(out.into()) } pub fn to_struct(&self, name: String) -> RbSeries { let s = self.df.borrow().clone().into_struct(&name); s.into_series().into() } pub fn unnest(&self, names: Vec) -> RbResult { let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?; Ok(df.into()) } pub fn clear(&self) -> Self { self.df.borrow().clear().into() } }