ext/polars/src/dataframe.rs in polars-df-0.1.0 vs ext/polars/src/dataframe.rs in polars-df-0.1.1

- old
+ new

@@ -5,12 +5,13 @@ use std::fs::File; use std::io::{BufReader, BufWriter, Cursor}; use std::ops::Deref; use std::path::PathBuf; -use crate::conversion::parse_parquet_compression; +use crate::conversion::*; use crate::file::{get_file_like, get_mmap_bytes_reader}; +use crate::series::to_rbseries_collection; use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries}; #[magnus::wrap(class = "Polars::RbDataFrame")] pub struct RbDataFrame { pub df: RefCell<DataFrame>, @@ -36,10 +37,14 @@ } let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?; Ok(RbDataFrame::new(df)) } + pub fn estimated_size(&self) -> usize { + self.df.borrow().estimated_size() + } + pub fn read_csv(rb_f: Value, has_header: bool) -> RbResult<Self> { let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?; let df = CsvReader::new(mmap_bytes_r) .has_header(has_header) .finish() @@ -211,27 +216,45 @@ pub fn to_s(&self) -> String { format!("{}", self.df.borrow()) } + pub fn get_columns(&self) -> Vec<RbSeries> { + let cols = self.df.borrow().get_columns().clone(); + to_rbseries_collection(cols) + } + pub fn columns(&self) -> Vec<String> { self.df .borrow() .get_column_names() .iter() .map(|v| v.to_string()) .collect() } + pub fn set_column_names(&self, names: Vec<String>) -> RbResult<()> { + self.df + .borrow_mut() + .set_column_names(&names) + .map_err(RbPolarsErr::from)?; + Ok(()) + } + pub fn dtypes(&self) -> Vec<String> { self.df .borrow() .iter() .map(|s| s.dtype().to_string()) .collect() } + pub fn n_chunks(&self) -> RbResult<usize> { + let n = self.df.borrow().n_chunks().map_err(RbPolarsErr::from)?; + Ok(n) + } + pub fn shape(&self) -> (usize, usize) { self.df.borrow().shape() } pub fn height(&self) -> usize { @@ -256,10 +279,32 @@ .column(&name) .map(|v| v.clone().into()) .map_err(RbPolarsErr::from) } + pub fn select(&self, selection: Vec<String>) -> RbResult<Self> { + let df = self + .df + .borrow() + .select(selection) + .map_err(RbPolarsErr::from)?; + Ok(RbDataFrame::new(df)) + } + + pub fn take(&self, indices: Vec<IdxSize>) -> RbResult<Self> { + let indices = IdxCa::from_vec("", indices); + let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?; + Ok(RbDataFrame::new(df)) + } + + pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> { + let binding = indices.series.borrow(); + let idx = binding.idx().map_err(RbPolarsErr::from)?; + let df = self.df.borrow().take(idx).map_err(RbPolarsErr::from)?; + Ok(RbDataFrame::new(df)) + } + pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> RbResult<Self> { let df = self .df .borrow() .sort_with_options( @@ -271,34 +316,266 @@ ) .map_err(RbPolarsErr::from)?; Ok(RbDataFrame::new(df)) } + pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> { + self.df + .borrow_mut() + .replace(&column, new_col.series.borrow().clone()) + .map_err(RbPolarsErr::from)?; + Ok(()) + } + + pub fn replace_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> { + self.df + .borrow_mut() + .replace_at_idx(index, new_col.series.borrow().clone()) + .map_err(RbPolarsErr::from)?; + Ok(()) + } + + pub fn insert_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> { + self.df + .borrow_mut() + .insert_at_idx(index, new_col.series.borrow().clone()) + .map_err(RbPolarsErr::from)?; + Ok(()) + } + + pub fn slice(&self, offset: usize, length: Option<usize>) -> Self { + let df = self.df.borrow().slice( + offset as i64, + length.unwrap_or_else(|| self.df.borrow().height()), + ); + df.into() + } + pub fn head(&self, length: Option<usize>) -> Self { self.df.borrow().head(length).into() } pub fn tail(&self, length: Option<usize>) -> Self { self.df.borrow().tail(length).into() } + pub fn is_unique(&self) -> RbResult<RbSeries> { + let mask = self.df.borrow().is_unique().map_err(RbPolarsErr::from)?; + Ok(mask.into_series().into()) + } + + pub fn is_duplicated(&self) -> RbResult<RbSeries> { + let mask = self + .df + .borrow() + .is_duplicated() + .map_err(RbPolarsErr::from)?; + Ok(mask.into_series().into()) + } + pub fn frame_equal(&self, other: &RbDataFrame, null_equal: bool) -> bool { if null_equal { self.df.borrow().frame_equal_missing(&other.df.borrow()) } else { self.df.borrow().frame_equal(&other.df.borrow()) } } + pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> { + let df = self + .df + .borrow() + .with_row_count(&name, offset) + .map_err(RbPolarsErr::from)?; + Ok(df.into()) + } + + pub fn clone(&self) -> Self { + RbDataFrame::new(self.df.borrow().clone()) + } + + pub fn melt( + &self, + id_vars: Vec<String>, + value_vars: Vec<String>, + value_name: Option<String>, + variable_name: Option<String>, + ) -> RbResult<Self> { + let args = MeltArgs { + id_vars, + value_vars, + value_name, + variable_name, + }; + + let df = self.df.borrow().melt2(args).map_err(RbPolarsErr::from)?; + Ok(RbDataFrame::new(df)) + } + + pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> { + let out = if stable { + self.df.borrow().partition_by_stable(groups) + } else { + self.df.borrow().partition_by(groups) + } + .map_err(RbPolarsErr::from)?; + Ok(out.into_iter().map(|v| RbDataFrame::new(v)).collect()) + } + + pub fn shift(&self, periods: i64) -> Self { + self.df.borrow().shift(periods).into() + } + + pub fn unique( + &self, + maintain_order: bool, + subset: Option<Vec<String>>, + keep: Wrap<UniqueKeepStrategy>, + ) -> RbResult<Self> { + let subset = subset.as_ref().map(|v| v.as_ref()); + let df = match maintain_order { + true => self.df.borrow().unique_stable(subset, keep.0), + false => self.df.borrow().unique(subset, keep.0), + } + .map_err(RbPolarsErr::from)?; + Ok(df.into()) + } + pub fn lazy(&self) -> RbLazyFrame { self.df.borrow().clone().lazy().into() } + pub fn max(&self) -> Self { + self.df.borrow().max().into() + } + + pub fn min(&self) -> Self { + self.df.borrow().min().into() + } + + pub fn sum(&self) -> Self { + self.df.borrow().sum().into() + } + pub fn mean(&self) -> Self { self.df.borrow().mean().into() } + pub fn std(&self, ddof: u8) -> Self { + self.df.borrow().std(ddof).into() + } + + pub fn var(&self, ddof: u8) -> Self { + self.df.borrow().var(ddof).into() + } + + pub fn median(&self) -> Self { + self.df.borrow().median().into() + } + + pub fn hmean(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> { + let s = self + .df + .borrow() + .hmean(null_strategy.0) + .map_err(RbPolarsErr::from)?; + Ok(s.map(|s| s.into())) + } + + pub fn hmax(&self) -> RbResult<Option<RbSeries>> { + let s = self.df.borrow().hmax().map_err(RbPolarsErr::from)?; + Ok(s.map(|s| s.into())) + } + + pub fn hmin(&self) -> RbResult<Option<RbSeries>> { + let s = self.df.borrow().hmin().map_err(RbPolarsErr::from)?; + Ok(s.map(|s| s.into())) + } + + pub fn hsum(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> { + let s = self + .df + .borrow() + .hsum(null_strategy.0) + .map_err(RbPolarsErr::from)?; + Ok(s.map(|s| s.into())) + } + + pub fn quantile( + &self, + quantile: f64, + interpolation: Wrap<QuantileInterpolOptions>, + ) -> RbResult<Self> { + let df = self + .df + .borrow() + .quantile(quantile, interpolation.0) + .map_err(RbPolarsErr::from)?; + Ok(df.into()) + } + + pub fn to_dummies(&self, columns: Option<Vec<String>>) -> RbResult<Self> { + let df = match columns { + Some(cols) => self + .df + .borrow() + .columns_to_dummies(cols.iter().map(|x| x as &str).collect()), + None => self.df.borrow().to_dummies(), + } + .map_err(RbPolarsErr::from)?; + Ok(df.into()) + } + pub fn null_count(&self) -> Self { let df = self.df.borrow().null_count(); df.into() + } + + pub fn shrink_to_fit(&self) { + self.df.borrow_mut().shrink_to_fit(); + } + + pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> { + let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?; + if include_header { + let s = Utf8Chunked::from_iter_values( + &names, + self.df.borrow().get_columns().iter().map(|s| s.name()), + ) + .into_series(); + df.insert_at_idx(0, s).unwrap(); + } + Ok(df.into()) + } + + pub fn upsample( + &self, + by: Vec<String>, + index_column: String, + every: String, + offset: String, + stable: bool, + ) -> RbResult<Self> { + let out = if stable { + self.df.borrow().upsample_stable( + by, + &index_column, + Duration::parse(&every), + Duration::parse(&offset), + ) + } else { + self.df.borrow().upsample( + by, + &index_column, + Duration::parse(&every), + Duration::parse(&offset), + ) + }; + let out = out.map_err(RbPolarsErr::from)?; + Ok(out.into()) + } + + pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> { + let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?; + Ok(df.into()) } }