ext/polars/src/conversion.rs in polars-df-0.1.1 vs ext/polars/src/conversion.rs in polars-df-0.1.2

- old
+ new

@@ -1,30 +1,73 @@ -use magnus::{TryConvert, Value, QNIL}; +use magnus::{RArray, Symbol, TryConvert, Value, QNIL}; use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy}; use polars::datatypes::AnyValue; use polars::frame::DataFrame; use polars::prelude::*; use polars::series::ops::NullBehavior; -use crate::{RbDataFrame, RbPolarsErr, RbResult, RbValueError}; +use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError}; pub struct Wrap<T>(pub T); impl<T> From<T> for Wrap<T> { fn from(t: T) -> Self { Wrap(t) } } +pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> { + let seq: RArray = obj.try_convert()?; + let len = seq.len(); + Ok((seq, len)) +} + pub fn get_df(obj: Value) -> RbResult<DataFrame> { let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?; Ok(rbdf.df.borrow().clone()) } -impl Into<Value> for Wrap<AnyValue<'_>> { - fn into(self) -> Value { - match self.0 { +pub fn get_series(obj: Value) -> RbResult<Series> { + let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?; + Ok(rbs.series.borrow().clone()) +} + +impl TryConvert for Wrap<Utf8Chunked> { + fn try_convert(obj: Value) -> RbResult<Self> { + let (seq, len) = get_rbseq(obj)?; + let mut builder = Utf8ChunkedBuilder::new("", len, len * 25); + + for res in seq.each() { + let item = res?; + match item.try_convert::<String>() { + Ok(val) => builder.append_value(&val), + Err(_) => builder.append_null(), + } + } + Ok(Wrap(builder.finish())) + } +} + +impl TryConvert for Wrap<NullValues> { + fn try_convert(ob: Value) -> RbResult<Self> { + if let Ok(s) = ob.try_convert::<String>() { + Ok(Wrap(NullValues::AllColumnsSingle(s))) + } else if let Ok(s) = ob.try_convert::<Vec<String>>() { + Ok(Wrap(NullValues::AllColumns(s))) + } else if let Ok(s) = ob.try_convert::<Vec<(String, String)>>() { + Ok(Wrap(NullValues::Named(s))) + } else { + Err(RbPolarsErr::other( + "could not extract value from null_values argument".into(), + )) + } + } +} + +impl From<Wrap<AnyValue<'_>>> for Value { + fn from(w: Wrap<AnyValue<'_>>) -> Self { + match w.0 { AnyValue::UInt8(v) => Value::from(v), AnyValue::UInt16(v) => Value::from(v), AnyValue::UInt32(v) => Value::from(v), AnyValue::UInt64(v) => Value::from(v), AnyValue::Int8(v) => Value::from(v), @@ -39,10 +82,16 @@ _ => todo!(), } } } +impl From<Wrap<DataType>> for Value { + fn from(w: Wrap<DataType>) -> Self { + Symbol::from(w.0.to_string()).into() + } +} + impl TryConvert for Wrap<DataType> { fn try_convert(ob: Value) -> RbResult<Self> { let dtype = match ob.try_convert::<String>()?.as_str() { "u8" => DataType::UInt8, "u16" => DataType::UInt16, @@ -116,10 +165,43 @@ }; Ok(Wrap(parsed)) } } +impl TryConvert for Wrap<CsvEncoding> { + fn try_convert(ob: Value) -> RbResult<Self> { + let parsed = match ob.try_convert::<String>()?.as_str() { + "utf8" => CsvEncoding::Utf8, + "utf8-lossy" => CsvEncoding::LossyUtf8, + v => { + return Err(RbValueError::new_err(format!( + "encoding must be one of {{'utf8', 'utf8-lossy'}}, got {}", + v + ))) + } + }; + Ok(Wrap(parsed)) + } +} + +impl TryConvert for Wrap<Option<IpcCompression>> { + fn try_convert(ob: Value) -> RbResult<Self> { + let parsed = match ob.try_convert::<String>()?.as_str() { + "uncompressed" => None, + "lz4" => Some(IpcCompression::LZ4), + "zstd" => Some(IpcCompression::ZSTD), + v => { + return Err(RbValueError::new_err(format!( + "compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {}", + v + ))) + } + }; + Ok(Wrap(parsed)) + } +} + impl TryConvert for Wrap<JoinType> { fn try_convert(ob: Value) -> RbResult<Self> { let parsed = match ob.try_convert::<String>()?.as_str() { "inner" => JoinType::Inner, "left" => JoinType::Left, @@ -161,9 +243,27 @@ "ignore" => NullStrategy::Ignore, "propagate" => NullStrategy::Propagate, v => { return Err(RbValueError::new_err(format!( "null strategy must be one of {{'ignore', 'propagate'}}, got {}", + v + ))) + } + }; + Ok(Wrap(parsed)) + } +} + +impl TryConvert for Wrap<ParallelStrategy> { + fn try_convert(ob: Value) -> RbResult<Self> { + let parsed = match ob.try_convert::<String>()?.as_str() { + "auto" => ParallelStrategy::Auto, + "columns" => ParallelStrategy::Columns, + "row_groups" => ParallelStrategy::RowGroups, + "none" => ParallelStrategy::None, + v => { + return Err(RbValueError::new_err(format!( + "parallel must be one of {{'auto', 'columns', 'row_groups', 'none'}}, got {}", v ))) } }; Ok(Wrap(parsed))