use magnus::{class, RArray, Symbol, TryConvert, Value, QNIL}; use polars::chunked_array::object::PolarsObjectSafe; use polars::chunked_array::ops::{FillNullLimit, FillNullStrategy}; use polars::datatypes::AnyValue; use polars::frame::DataFrame; use polars::prelude::*; use polars::series::ops::NullBehavior; use std::fmt::{Display, Formatter}; use std::hash::{Hash, Hasher}; use crate::{RbDataFrame, RbPolarsErr, RbResult, RbSeries, RbValueError}; pub struct Wrap(pub T); impl From for Wrap { fn from(t: T) -> Self { Wrap(t) } } pub fn get_rbseq(obj: Value) -> RbResult<(RArray, usize)> { let seq: RArray = obj.try_convert()?; let len = seq.len(); Ok((seq, len)) } pub fn get_df(obj: Value) -> RbResult { let rbdf = obj.funcall::<_, _, &RbDataFrame>("_df", ())?; Ok(rbdf.df.borrow().clone()) } pub fn get_series(obj: Value) -> RbResult { let rbs = obj.funcall::<_, _, &RbSeries>("_s", ())?; Ok(rbs.series.borrow().clone()) } impl TryConvert for Wrap { fn try_convert(obj: Value) -> RbResult { let (seq, len) = get_rbseq(obj)?; let mut builder = Utf8ChunkedBuilder::new("", len, len * 25); for res in seq.each() { let item = res?; match item.try_convert::() { Ok(val) => builder.append_value(&val), Err(_) => builder.append_null(), } } Ok(Wrap(builder.finish())) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { if let Ok(s) = ob.try_convert::() { Ok(Wrap(NullValues::AllColumnsSingle(s))) } else if let Ok(s) = ob.try_convert::>() { Ok(Wrap(NullValues::AllColumns(s))) } else if let Ok(s) = ob.try_convert::>() { Ok(Wrap(NullValues::Named(s))) } else { Err(RbPolarsErr::other( "could not extract value from null_values argument".into(), )) } } } impl From>> for Value { fn from(w: Wrap>) -> Self { match w.0 { AnyValue::UInt8(v) => Value::from(v), AnyValue::UInt16(v) => Value::from(v), AnyValue::UInt32(v) => Value::from(v), AnyValue::UInt64(v) => Value::from(v), AnyValue::Int8(v) => Value::from(v), AnyValue::Int16(v) => Value::from(v), AnyValue::Int32(v) => Value::from(v), AnyValue::Int64(v) => Value::from(v), AnyValue::Float32(v) => Value::from(v), AnyValue::Float64(v) => Value::from(v), AnyValue::Null => *QNIL, AnyValue::Boolean(v) => Value::from(v), AnyValue::Utf8(v) => Value::from(v), AnyValue::Date(v) => class::time() .funcall::<_, _, Value>("at", (v * 86400,)) .unwrap() .funcall::<_, _, Value>("utc", ()) .unwrap() .funcall::<_, _, Value>("to_date", ()) .unwrap(), AnyValue::Datetime(v, tu, tz) => { let t = match tu { TimeUnit::Nanoseconds => todo!(), TimeUnit::Microseconds => { let sec = v / 1000000; let subsec = v % 1000000; class::time() .funcall::<_, _, Value>("at", (sec, subsec, Symbol::new("usec"))) .unwrap() } TimeUnit::Milliseconds => todo!(), }; if tz.is_some() { todo!(); } else { t.funcall::<_, _, Value>("utc", ()).unwrap() } } _ => todo!(), } } } impl From> for Value { fn from(w: Wrap) -> Self { Symbol::from(w.0.to_string()).into() } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let dtype = match ob.try_convert::()?.as_str() { "u8" => DataType::UInt8, "u16" => DataType::UInt16, "u32" => DataType::UInt32, "u64" => DataType::UInt64, "i8" => DataType::Int8, "i16" => DataType::Int16, "i32" => DataType::Int32, "i64" => DataType::Int64, "str" => DataType::Utf8, "bin" => DataType::Binary, "bool" => DataType::Boolean, "cat" => DataType::Categorical(None), "date" => DataType::Date, "datetime" => DataType::Datetime(TimeUnit::Microseconds, None), "f32" => DataType::Float32, "time" => DataType::Time, "dur" => DataType::Duration(TimeUnit::Microseconds), "f64" => DataType::Float64, // "obj" => DataType::Object(OBJECT_NAME), "list" => DataType::List(Box::new(DataType::Boolean)), "null" => DataType::Null, "unk" => DataType::Unknown, _ => { return Err(RbValueError::new_err(format!( "{} is not a supported DataType.", ob ))) } }; Ok(Wrap(dtype)) } } impl<'s> TryConvert for Wrap> { fn try_convert(ob: Value) -> RbResult { // TODO improve if let Ok(v) = ob.try_convert::() { Ok(AnyValue::Int64(v).into()) } else if let Ok(v) = ob.try_convert::() { Ok(AnyValue::Float64(v).into()) } else { Err(RbPolarsErr::other(format!( "object type not supported {:?}", ob ))) } } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "physical" => CategoricalOrdering::Physical, "lexical" => CategoricalOrdering::Lexical, v => { return Err(RbValueError::new_err(format!( "ordering must be one of {{'physical', 'lexical'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "left" => ClosedWindow::Left, "right" => ClosedWindow::Right, "both" => ClosedWindow::Both, "none" => ClosedWindow::None, v => { return Err(RbValueError::new_err(format!( "closed must be one of {{'left', 'right', 'both', 'none'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "utf8" => CsvEncoding::Utf8, "utf8-lossy" => CsvEncoding::LossyUtf8, v => { return Err(RbValueError::new_err(format!( "encoding must be one of {{'utf8', 'utf8-lossy'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap> { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "uncompressed" => None, "lz4" => Some(IpcCompression::LZ4), "zstd" => Some(IpcCompression::ZSTD), v => { return Err(RbValueError::new_err(format!( "compression must be one of {{'uncompressed', 'lz4', 'zstd'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "inner" => JoinType::Inner, "left" => JoinType::Left, "outer" => JoinType::Outer, "semi" => JoinType::Semi, "anti" => JoinType::Anti, // #[cfg(feature = "cross_join")] // "cross" => JoinType::Cross, v => { return Err(RbValueError::new_err(format!( "how must be one of {{'inner', 'left', 'outer', 'semi', 'anti', 'cross'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "first_non_null" => ListToStructWidthStrategy::FirstNonNull, "max_width" => ListToStructWidthStrategy::MaxWidth, v => { return Err(RbValueError::new_err(format!( "n_field_strategy must be one of {{'first_non_null', 'max_width'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "drop" => NullBehavior::Drop, "ignore" => NullBehavior::Ignore, v => { return Err(RbValueError::new_err(format!( "null behavior must be one of {{'drop', 'ignore'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "ignore" => NullStrategy::Ignore, "propagate" => NullStrategy::Propagate, v => { return Err(RbValueError::new_err(format!( "null strategy must be one of {{'ignore', 'propagate'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "auto" => ParallelStrategy::Auto, "columns" => ParallelStrategy::Columns, "row_groups" => ParallelStrategy::RowGroups, "none" => ParallelStrategy::None, v => { return Err(RbValueError::new_err(format!( "parallel must be one of {{'auto', 'columns', 'row_groups', 'none'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "lower" => QuantileInterpolOptions::Lower, "higher" => QuantileInterpolOptions::Higher, "nearest" => QuantileInterpolOptions::Nearest, "linear" => QuantileInterpolOptions::Linear, "midpoint" => QuantileInterpolOptions::Midpoint, v => { return Err(RbValueError::new_err(format!( "interpolation must be one of {{'lower', 'higher', 'nearest', 'linear', 'midpoint'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "min" => RankMethod::Min, "max" => RankMethod::Max, "average" => RankMethod::Average, "dense" => RankMethod::Dense, "ordinal" => RankMethod::Ordinal, "random" => RankMethod::Random, v => { return Err(RbValueError::new_err(format!( "method must be one of {{'min', 'max', 'average', 'dense', 'ordinal', 'random'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "ns" => TimeUnit::Nanoseconds, "us" => TimeUnit::Microseconds, "ms" => TimeUnit::Milliseconds, v => { return Err(RbValueError::new_err(format!( "time unit must be one of {{'ns', 'us', 'ms'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } impl TryConvert for Wrap { fn try_convert(ob: Value) -> RbResult { let parsed = match ob.try_convert::()?.as_str() { "first" => UniqueKeepStrategy::First, "last" => UniqueKeepStrategy::Last, v => { return Err(RbValueError::new_err(format!( "keep must be one of {{'first', 'last'}}, got {}", v ))) } }; Ok(Wrap(parsed)) } } pub fn parse_fill_null_strategy( strategy: &str, limit: FillNullLimit, ) -> RbResult { let parsed = match strategy { "forward" => FillNullStrategy::Forward(limit), "backward" => FillNullStrategy::Backward(limit), "min" => FillNullStrategy::Min, "max" => FillNullStrategy::Max, "mean" => FillNullStrategy::Mean, "zero" => FillNullStrategy::Zero, "one" => FillNullStrategy::One, e => { return Err(magnus::Error::runtime_error(format!( "strategy must be one of {{'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'}}, got {}", e, ))) } }; Ok(parsed) } pub fn parse_parquet_compression( compression: &str, compression_level: Option, ) -> RbResult { let parsed = match compression { "uncompressed" => ParquetCompression::Uncompressed, "snappy" => ParquetCompression::Snappy, "gzip" => ParquetCompression::Gzip( compression_level .map(|lvl| { GzipLevel::try_new(lvl as u8) .map_err(|e| RbValueError::new_err(format!("{:?}", e))) }) .transpose()?, ), "lzo" => ParquetCompression::Lzo, "brotli" => ParquetCompression::Brotli( compression_level .map(|lvl| { BrotliLevel::try_new(lvl as u32) .map_err(|e| RbValueError::new_err(format!("{:?}", e))) }) .transpose()?, ), "lz4" => ParquetCompression::Lz4Raw, "zstd" => ParquetCompression::Zstd( compression_level .map(|lvl| { ZstdLevel::try_new(lvl) .map_err(|e| RbValueError::new_err(format!("{:?}", e))) }) .transpose()?, ), e => { return Err(RbValueError::new_err(format!( "compression must be one of {{'uncompressed', 'snappy', 'gzip', 'lzo', 'brotli', 'lz4', 'zstd'}}, got {}", e ))) } }; Ok(parsed) } #[derive(Clone, Debug)] pub struct ObjectValue { pub inner: Value, } impl Hash for ObjectValue { fn hash(&self, state: &mut H) { let h = self .inner .funcall::<_, _, isize>("hash", ()) .expect("should be hashable"); state.write_isize(h) } } impl Eq for ObjectValue {} impl PartialEq for ObjectValue { fn eq(&self, other: &Self) -> bool { self.inner.eql(&other.inner).unwrap_or(false) } } impl Display for ObjectValue { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { write!(f, "{}", self.inner) } } impl PolarsObject for ObjectValue { fn type_name() -> &'static str { "object" } } impl From for ObjectValue { fn from(v: Value) -> Self { Self { inner: v } } } impl From<&dyn PolarsObjectSafe> for &ObjectValue { fn from(val: &dyn PolarsObjectSafe) -> Self { unsafe { &*(val as *const dyn PolarsObjectSafe as *const ObjectValue) } } } impl ObjectValue { pub fn to_object(&self) -> Value { self.inner } } impl Default for ObjectValue { fn default() -> Self { ObjectValue { inner: *QNIL } } }