ext/polars/src/dataframe.rs in polars-df-0.1.0 vs ext/polars/src/dataframe.rs in polars-df-0.1.1
- old
+ new
@@ -5,12 +5,13 @@
use std::fs::File;
use std::io::{BufReader, BufWriter, Cursor};
use std::ops::Deref;
use std::path::PathBuf;
-use crate::conversion::parse_parquet_compression;
+use crate::conversion::*;
use crate::file::{get_file_like, get_mmap_bytes_reader};
+use crate::series::to_rbseries_collection;
use crate::{series, RbLazyFrame, RbPolarsErr, RbResult, RbSeries};
#[magnus::wrap(class = "Polars::RbDataFrame")]
pub struct RbDataFrame {
pub df: RefCell<DataFrame>,
@@ -36,10 +37,14 @@
}
let df = DataFrame::new(cols).map_err(RbPolarsErr::from)?;
Ok(RbDataFrame::new(df))
}
+ pub fn estimated_size(&self) -> usize {
+ self.df.borrow().estimated_size()
+ }
+
pub fn read_csv(rb_f: Value, has_header: bool) -> RbResult<Self> {
let mmap_bytes_r = get_mmap_bytes_reader(rb_f)?;
let df = CsvReader::new(mmap_bytes_r)
.has_header(has_header)
.finish()
@@ -211,27 +216,45 @@
pub fn to_s(&self) -> String {
format!("{}", self.df.borrow())
}
+ pub fn get_columns(&self) -> Vec<RbSeries> {
+ let cols = self.df.borrow().get_columns().clone();
+ to_rbseries_collection(cols)
+ }
+
pub fn columns(&self) -> Vec<String> {
self.df
.borrow()
.get_column_names()
.iter()
.map(|v| v.to_string())
.collect()
}
+ pub fn set_column_names(&self, names: Vec<String>) -> RbResult<()> {
+ self.df
+ .borrow_mut()
+ .set_column_names(&names)
+ .map_err(RbPolarsErr::from)?;
+ Ok(())
+ }
+
pub fn dtypes(&self) -> Vec<String> {
self.df
.borrow()
.iter()
.map(|s| s.dtype().to_string())
.collect()
}
+ pub fn n_chunks(&self) -> RbResult<usize> {
+ let n = self.df.borrow().n_chunks().map_err(RbPolarsErr::from)?;
+ Ok(n)
+ }
+
pub fn shape(&self) -> (usize, usize) {
self.df.borrow().shape()
}
pub fn height(&self) -> usize {
@@ -256,10 +279,32 @@
.column(&name)
.map(|v| v.clone().into())
.map_err(RbPolarsErr::from)
}
+ pub fn select(&self, selection: Vec<String>) -> RbResult<Self> {
+ let df = self
+ .df
+ .borrow()
+ .select(selection)
+ .map_err(RbPolarsErr::from)?;
+ Ok(RbDataFrame::new(df))
+ }
+
+ pub fn take(&self, indices: Vec<IdxSize>) -> RbResult<Self> {
+ let indices = IdxCa::from_vec("", indices);
+ let df = self.df.borrow().take(&indices).map_err(RbPolarsErr::from)?;
+ Ok(RbDataFrame::new(df))
+ }
+
+ pub fn take_with_series(&self, indices: &RbSeries) -> RbResult<Self> {
+ let binding = indices.series.borrow();
+ let idx = binding.idx().map_err(RbPolarsErr::from)?;
+ let df = self.df.borrow().take(idx).map_err(RbPolarsErr::from)?;
+ Ok(RbDataFrame::new(df))
+ }
+
pub fn sort(&self, by_column: String, reverse: bool, nulls_last: bool) -> RbResult<Self> {
let df = self
.df
.borrow()
.sort_with_options(
@@ -271,34 +316,266 @@
)
.map_err(RbPolarsErr::from)?;
Ok(RbDataFrame::new(df))
}
+ pub fn replace(&self, column: String, new_col: &RbSeries) -> RbResult<()> {
+ self.df
+ .borrow_mut()
+ .replace(&column, new_col.series.borrow().clone())
+ .map_err(RbPolarsErr::from)?;
+ Ok(())
+ }
+
+ pub fn replace_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
+ self.df
+ .borrow_mut()
+ .replace_at_idx(index, new_col.series.borrow().clone())
+ .map_err(RbPolarsErr::from)?;
+ Ok(())
+ }
+
+ pub fn insert_at_idx(&self, index: usize, new_col: &RbSeries) -> RbResult<()> {
+ self.df
+ .borrow_mut()
+ .insert_at_idx(index, new_col.series.borrow().clone())
+ .map_err(RbPolarsErr::from)?;
+ Ok(())
+ }
+
+ pub fn slice(&self, offset: usize, length: Option<usize>) -> Self {
+ let df = self.df.borrow().slice(
+ offset as i64,
+ length.unwrap_or_else(|| self.df.borrow().height()),
+ );
+ df.into()
+ }
+
pub fn head(&self, length: Option<usize>) -> Self {
self.df.borrow().head(length).into()
}
pub fn tail(&self, length: Option<usize>) -> Self {
self.df.borrow().tail(length).into()
}
+ pub fn is_unique(&self) -> RbResult<RbSeries> {
+ let mask = self.df.borrow().is_unique().map_err(RbPolarsErr::from)?;
+ Ok(mask.into_series().into())
+ }
+
+ pub fn is_duplicated(&self) -> RbResult<RbSeries> {
+ let mask = self
+ .df
+ .borrow()
+ .is_duplicated()
+ .map_err(RbPolarsErr::from)?;
+ Ok(mask.into_series().into())
+ }
+
pub fn frame_equal(&self, other: &RbDataFrame, null_equal: bool) -> bool {
if null_equal {
self.df.borrow().frame_equal_missing(&other.df.borrow())
} else {
self.df.borrow().frame_equal(&other.df.borrow())
}
}
+ pub fn with_row_count(&self, name: String, offset: Option<IdxSize>) -> RbResult<Self> {
+ let df = self
+ .df
+ .borrow()
+ .with_row_count(&name, offset)
+ .map_err(RbPolarsErr::from)?;
+ Ok(df.into())
+ }
+
+ pub fn clone(&self) -> Self {
+ RbDataFrame::new(self.df.borrow().clone())
+ }
+
+ pub fn melt(
+ &self,
+ id_vars: Vec<String>,
+ value_vars: Vec<String>,
+ value_name: Option<String>,
+ variable_name: Option<String>,
+ ) -> RbResult<Self> {
+ let args = MeltArgs {
+ id_vars,
+ value_vars,
+ value_name,
+ variable_name,
+ };
+
+ let df = self.df.borrow().melt2(args).map_err(RbPolarsErr::from)?;
+ Ok(RbDataFrame::new(df))
+ }
+
+ pub fn partition_by(&self, groups: Vec<String>, stable: bool) -> RbResult<Vec<Self>> {
+ let out = if stable {
+ self.df.borrow().partition_by_stable(groups)
+ } else {
+ self.df.borrow().partition_by(groups)
+ }
+ .map_err(RbPolarsErr::from)?;
+ Ok(out.into_iter().map(|v| RbDataFrame::new(v)).collect())
+ }
+
+ pub fn shift(&self, periods: i64) -> Self {
+ self.df.borrow().shift(periods).into()
+ }
+
+ pub fn unique(
+ &self,
+ maintain_order: bool,
+ subset: Option<Vec<String>>,
+ keep: Wrap<UniqueKeepStrategy>,
+ ) -> RbResult<Self> {
+ let subset = subset.as_ref().map(|v| v.as_ref());
+ let df = match maintain_order {
+ true => self.df.borrow().unique_stable(subset, keep.0),
+ false => self.df.borrow().unique(subset, keep.0),
+ }
+ .map_err(RbPolarsErr::from)?;
+ Ok(df.into())
+ }
+
pub fn lazy(&self) -> RbLazyFrame {
self.df.borrow().clone().lazy().into()
}
+ pub fn max(&self) -> Self {
+ self.df.borrow().max().into()
+ }
+
+ pub fn min(&self) -> Self {
+ self.df.borrow().min().into()
+ }
+
+ pub fn sum(&self) -> Self {
+ self.df.borrow().sum().into()
+ }
+
pub fn mean(&self) -> Self {
self.df.borrow().mean().into()
}
+ pub fn std(&self, ddof: u8) -> Self {
+ self.df.borrow().std(ddof).into()
+ }
+
+ pub fn var(&self, ddof: u8) -> Self {
+ self.df.borrow().var(ddof).into()
+ }
+
+ pub fn median(&self) -> Self {
+ self.df.borrow().median().into()
+ }
+
+ pub fn hmean(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
+ let s = self
+ .df
+ .borrow()
+ .hmean(null_strategy.0)
+ .map_err(RbPolarsErr::from)?;
+ Ok(s.map(|s| s.into()))
+ }
+
+ pub fn hmax(&self) -> RbResult<Option<RbSeries>> {
+ let s = self.df.borrow().hmax().map_err(RbPolarsErr::from)?;
+ Ok(s.map(|s| s.into()))
+ }
+
+ pub fn hmin(&self) -> RbResult<Option<RbSeries>> {
+ let s = self.df.borrow().hmin().map_err(RbPolarsErr::from)?;
+ Ok(s.map(|s| s.into()))
+ }
+
+ pub fn hsum(&self, null_strategy: Wrap<NullStrategy>) -> RbResult<Option<RbSeries>> {
+ let s = self
+ .df
+ .borrow()
+ .hsum(null_strategy.0)
+ .map_err(RbPolarsErr::from)?;
+ Ok(s.map(|s| s.into()))
+ }
+
+ pub fn quantile(
+ &self,
+ quantile: f64,
+ interpolation: Wrap<QuantileInterpolOptions>,
+ ) -> RbResult<Self> {
+ let df = self
+ .df
+ .borrow()
+ .quantile(quantile, interpolation.0)
+ .map_err(RbPolarsErr::from)?;
+ Ok(df.into())
+ }
+
+ pub fn to_dummies(&self, columns: Option<Vec<String>>) -> RbResult<Self> {
+ let df = match columns {
+ Some(cols) => self
+ .df
+ .borrow()
+ .columns_to_dummies(cols.iter().map(|x| x as &str).collect()),
+ None => self.df.borrow().to_dummies(),
+ }
+ .map_err(RbPolarsErr::from)?;
+ Ok(df.into())
+ }
+
pub fn null_count(&self) -> Self {
let df = self.df.borrow().null_count();
df.into()
+ }
+
+ pub fn shrink_to_fit(&self) {
+ self.df.borrow_mut().shrink_to_fit();
+ }
+
+ pub fn transpose(&self, include_header: bool, names: String) -> RbResult<Self> {
+ let mut df = self.df.borrow().transpose().map_err(RbPolarsErr::from)?;
+ if include_header {
+ let s = Utf8Chunked::from_iter_values(
+ &names,
+ self.df.borrow().get_columns().iter().map(|s| s.name()),
+ )
+ .into_series();
+ df.insert_at_idx(0, s).unwrap();
+ }
+ Ok(df.into())
+ }
+
+ pub fn upsample(
+ &self,
+ by: Vec<String>,
+ index_column: String,
+ every: String,
+ offset: String,
+ stable: bool,
+ ) -> RbResult<Self> {
+ let out = if stable {
+ self.df.borrow().upsample_stable(
+ by,
+ &index_column,
+ Duration::parse(&every),
+ Duration::parse(&offset),
+ )
+ } else {
+ self.df.borrow().upsample(
+ by,
+ &index_column,
+ Duration::parse(&every),
+ Duration::parse(&offset),
+ )
+ };
+ let out = out.map_err(RbPolarsErr::from)?;
+ Ok(out.into())
+ }
+
+ pub fn unnest(&self, names: Vec<String>) -> RbResult<Self> {
+ let df = self.df.borrow().unnest(names).map_err(RbPolarsErr::from)?;
+ Ok(df.into())
}
}