lib/polars/lazy_frame.rb in polars-df-0.1.1 vs lib/polars/lazy_frame.rb in polars-df-0.1.2
- old
+ new
@@ -1,15 +1,159 @@
module Polars
+ # Representation of a Lazy computation graph/query againat a DataFrame.
class LazyFrame
+ # @private
attr_accessor :_ldf
+ # @private
def self._from_rbldf(rb_ldf)
ldf = LazyFrame.allocate
ldf._ldf = rb_ldf
ldf
end
+ # @private
+ def self._scan_csv(
+ file,
+ has_header: true,
+ sep: ",",
+ comment_char: nil,
+ quote_char: '"',
+ skip_rows: 0,
+ dtypes: nil,
+ null_values: nil,
+ ignore_errors: false,
+ cache: true,
+ with_column_names: nil,
+ infer_schema_length: 100,
+ n_rows: nil,
+ encoding: "utf8",
+ low_memory: false,
+ rechunk: true,
+ skip_rows_after_header: 0,
+ row_count_name: nil,
+ row_count_offset: 0,
+ parse_dates: false,
+ eol_char: "\n"
+ )
+ dtype_list = nil
+ if !dtypes.nil?
+ dtype_list = []
+ dtypes.each do |k, v|
+ dtype_list << [k, Utils.rb_type_to_dtype(v)]
+ end
+ end
+ processed_null_values = Utils._process_null_values(null_values)
+
+ _from_rbldf(
+ RbLazyFrame.new_from_csv(
+ file,
+ sep,
+ has_header,
+ ignore_errors,
+ skip_rows,
+ n_rows,
+ cache,
+ dtype_list,
+ low_memory,
+ comment_char,
+ quote_char,
+ processed_null_values,
+ infer_schema_length,
+ with_column_names,
+ rechunk,
+ skip_rows_after_header,
+ encoding,
+ Utils._prepare_row_count_args(row_count_name, row_count_offset),
+ parse_dates,
+ eol_char
+ )
+ )
+ end
+
+ # @private
+ def self._scan_parquet(
+ file,
+ n_rows: nil,
+ cache: true,
+ parallel: "auto",
+ rechunk: true,
+ row_count_name: nil,
+ row_count_offset: 0,
+ storage_options: nil,
+ low_memory: false
+ )
+ _from_rbldf(
+ RbLazyFrame.new_from_parquet(
+ file,
+ n_rows,
+ cache,
+ parallel,
+ rechunk,
+ Utils._prepare_row_count_args(row_count_name, row_count_offset),
+ low_memory
+ )
+ )
+ end
+
+ # @private
+ def self._scan_ipc(
+ file,
+ n_rows: nil,
+ cache: true,
+ rechunk: true,
+ row_count_name: nil,
+ row_count_offset: 0,
+ storage_options: nil,
+ memory_map: true
+ )
+ if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
+ file = Utils.format_path(file)
+ end
+
+ _from_rbldf(
+ RbLazyFrame.new_from_ipc(
+ file,
+ n_rows,
+ cache,
+ rechunk,
+ Utils._prepare_row_count_args(row_count_name, row_count_offset),
+ memory_map
+ )
+ )
+ end
+
+ # @private
+ def self._scan_ndjson(
+ file,
+ infer_schema_length: nil,
+ batch_size: nil,
+ n_rows: nil,
+ low_memory: false,
+ rechunk: true,
+ row_count_name: nil,
+ row_count_offset: 0
+ )
+ _from_rbldf(
+ RbLazyFrame.new_from_ndjson(
+ file,
+ infer_schema_length,
+ batch_size,
+ n_rows,
+ low_memory,
+ rechunk,
+ Utils._prepare_row_count_args(row_count_name, row_count_offset)
+ )
+ )
+ end
+
+ # def self.from_json
+ # end
+
+ # def self.read_json
+ # end
+
# def columns
# end
# def dtypes
# end
@@ -51,10 +195,11 @@
# end
# def profile
# end
+ #
def collect(
type_coercion: true,
predicate_pushdown: true,
projection_pushdown: true,
simplify_expression: true,
@@ -88,20 +233,22 @@
end
# def fetch
# end
+ #
def lazy
self
end
# def cache
# end
# def cleared
# end
+ #
def filter(predicate)
_from_rbldf(
_ldf.filter(
Utils.expr_to_lit_or_expr(predicate, str_to_lit: false)._rbexpr
)
@@ -126,10 +273,11 @@
# end
# def join_asof
# end
+ #
def join(
other,
left_on: nil,
right_on: nil,
on: nil,
@@ -200,17 +348,19 @@
end
# def with_context
# end
+ #
def with_column(column)
with_columns([column])
end
# def drop
# end
+ #
def rename(mapping)
existing = mapping.keys
_new = mapping.values
_from_rbldf(_ldf.rename(existing, _new))
end
@@ -249,10 +399,11 @@
# end
# def fill_null
# end
+ #
def fill_nan(fill_value)
if !fill_value.is_a?(Expr)
fill_value = Utils.lit(fill_value)
end
_from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
@@ -280,11 +431,14 @@
# end
# def quantile
# end
- # def explode
- # end
+ #
+ def explode(columns)
+ columns = Utils.selection_to_rbexpr_list(columns)
+ _from_rbldf(_ldf.explode(columns))
+ end
# def unique
# end
# def drop_nulls