lazy_frame.rb in polars-df-0.1.2

- old
+ new

@@ -1,15 +1,159 @@
 module Polars
+  # Representation of a Lazy computation graph/query againat a DataFrame.
   class LazyFrame
+    # @private
     attr_accessor :_ldf
 
+    # @private
     def self._from_rbldf(rb_ldf)
       ldf = LazyFrame.allocate
       ldf._ldf = rb_ldf
       ldf
     end
 
+    # @private
+    def self._scan_csv(
+      file,
+      has_header: true,
+      sep: ",",
+      comment_char: nil,
+      quote_char: '"',
+      skip_rows: 0,
+      dtypes: nil,
+      null_values: nil,
+      ignore_errors: false,
+      cache: true,
+      with_column_names: nil,
+      infer_schema_length: 100,
+      n_rows: nil,
+      encoding: "utf8",
+      low_memory: false,
+      rechunk: true,
+      skip_rows_after_header: 0,
+      row_count_name: nil,
+      row_count_offset: 0,
+      parse_dates: false,
+      eol_char: "\n"
+    )
+      dtype_list = nil
+      if !dtypes.nil?
+        dtype_list = []
+        dtypes.each do |k, v|
+          dtype_list << [k, Utils.rb_type_to_dtype(v)]
+        end
+      end
+      processed_null_values = Utils._process_null_values(null_values)
+
+      _from_rbldf(
+        RbLazyFrame.new_from_csv(
+          file,
+          sep,
+          has_header,
+          ignore_errors,
+          skip_rows,
+          n_rows,
+          cache,
+          dtype_list,
+          low_memory,
+          comment_char,
+          quote_char,
+          processed_null_values,
+          infer_schema_length,
+          with_column_names,
+          rechunk,
+          skip_rows_after_header,
+          encoding,
+          Utils._prepare_row_count_args(row_count_name, row_count_offset),
+          parse_dates,
+          eol_char
+        )
+      )
+    end
+
+    # @private
+    def self._scan_parquet(
+      file,
+      n_rows: nil,
+      cache: true,
+      parallel: "auto",
+      rechunk: true,
+      row_count_name: nil,
+      row_count_offset: 0,
+      storage_options: nil,
+      low_memory: false
+    )
+      _from_rbldf(
+        RbLazyFrame.new_from_parquet(
+          file,
+          n_rows,
+          cache,
+          parallel,
+          rechunk,
+          Utils._prepare_row_count_args(row_count_name, row_count_offset),
+          low_memory
+        )
+      )
+    end
+
+    # @private
+    def self._scan_ipc(
+      file,
+      n_rows: nil,
+      cache: true,
+      rechunk: true,
+      row_count_name: nil,
+      row_count_offset: 0,
+      storage_options: nil,
+      memory_map: true
+    )
+      if file.is_a?(String) || (defined?(Pathname) && file.is_a?(Pathname))
+        file = Utils.format_path(file)
+      end
+
+      _from_rbldf(
+        RbLazyFrame.new_from_ipc(
+          file,
+          n_rows,
+          cache,
+          rechunk,
+          Utils._prepare_row_count_args(row_count_name, row_count_offset),
+          memory_map
+        )
+      )
+    end
+
+    # @private
+    def self._scan_ndjson(
+      file,
+      infer_schema_length: nil,
+      batch_size: nil,
+      n_rows: nil,
+      low_memory: false,
+      rechunk: true,
+      row_count_name: nil,
+      row_count_offset: 0
+    )
+      _from_rbldf(
+        RbLazyFrame.new_from_ndjson(
+          file,
+          infer_schema_length,
+          batch_size,
+          n_rows,
+          low_memory,
+          rechunk,
+          Utils._prepare_row_count_args(row_count_name, row_count_offset)
+        )
+      )
+    end
+
+    # def self.from_json
+    # end
+
+    # def self.read_json
+    # end
+
     # def columns
     # end
 
     # def dtypes
     # end
@@ -51,10 +195,11 @@
     # end
 
     # def profile
     # end
 
+    #
     def collect(
       type_coercion: true,
       predicate_pushdown: true,
       projection_pushdown: true,
       simplify_expression: true,
@@ -88,20 +233,22 @@
     end
 
     # def fetch
     # end
 
+    #
     def lazy
       self
     end
 
     # def cache
     # end
 
     # def cleared
     # end
 
+    #
     def filter(predicate)
       _from_rbldf(
         _ldf.filter(
           Utils.expr_to_lit_or_expr(predicate, str_to_lit: false)._rbexpr
         )
@@ -126,10 +273,11 @@
     # end
 
     # def join_asof
     # end
 
+    #
     def join(
       other,
       left_on: nil,
       right_on: nil,
       on: nil,
@@ -200,17 +348,19 @@
     end
 
     # def with_context
     # end
 
+    #
     def with_column(column)
       with_columns([column])
     end
 
     # def drop
     # end
 
+    #
     def rename(mapping)
       existing = mapping.keys
       _new = mapping.values
       _from_rbldf(_ldf.rename(existing, _new))
     end
@@ -249,10 +399,11 @@
     # end
 
     # def fill_null
     # end
 
+    #
     def fill_nan(fill_value)
       if !fill_value.is_a?(Expr)
         fill_value = Utils.lit(fill_value)
       end
       _from_rbldf(_ldf.fill_nan(fill_value._rbexpr))
@@ -280,11 +431,14 @@
     # end
 
     # def quantile
     # end
 
-    # def explode
-    # end
+    #
+    def explode(columns)
+      columns = Utils.selection_to_rbexpr_list(columns)
+      _from_rbldf(_ldf.explode(columns))
+    end
 
     # def unique
     # end
 
     # def drop_nulls