module Polars # @private module Utils DTYPE_TEMPORAL_UNITS = ["ns", "us", "ms"] def self.wrap_s(s) Series._from_rbseries(s) end def self.wrap_df(df) DataFrame._from_rbdf(df) end def self.wrap_ldf(ldf) LazyFrame._from_rbldf(ldf) end def self.wrap_expr(rbexpr) Expr._from_rbexpr(rbexpr) end def self.col(name) Polars.col(name) end def self._timedelta_to_pl_duration(td) td end def self._datetime_to_pl_timestamp(dt, tu) if tu == "ns" (dt.to_datetime.utc.to_f * 1e9).to_i elsif tu == "us" (dt.to_datetime.utc.to_f * 1e6).to_i elsif tu == "ms" (dt.to_datetime.utc.to_f * 1e3).to_i elsif tu.nil? (dt.to_datetime.utc.to_f * 1e6).to_i else raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}" end end def self._to_ruby_datetime(value, dtype, tu: "ns", tz: nil) if dtype == :date # days to seconds # important to create from utc. Not doing this leads # to inconsistencies dependent on the timezone you are in. Time.at(value * 86400).utc.to_date # TODO fix dtype elsif dtype.to_s.start_with?("datetime[") if tz.nil? || tz == "" if tu == "ns" raise Todo elsif tu == "us" dt = Time.at(value / 1000000, value % 1000000, :usec).utc elsif tu == "ms" raise Todo else raise ArgumentError, "tu must be one of {{'ns', 'us', 'ms'}}, got #{tu}" end else raise Todo end dt else raise NotImplementedError end end def self.selection_to_rbexpr_list(exprs) if exprs.is_a?(String) || exprs.is_a?(Expr) || exprs.is_a?(Series) exprs = [exprs] end exprs.map { |e| expr_to_lit_or_expr(e, str_to_lit: false)._rbexpr } end def self.expr_to_lit_or_expr(expr, str_to_lit: true) if expr.is_a?(String) && !str_to_lit col(expr) elsif expr.is_a?(Integer) || expr.is_a?(Float) || expr.is_a?(String) || expr.is_a?(Series) || expr.nil? lit(expr) elsif expr.is_a?(Expr) expr else raise ArgumentError, "did not expect value #{expr} of type #{expr.class.name}, maybe disambiguate with Polars.lit or Polars.col" end end def self.lit(value) Polars.lit(value) end def self.format_path(path) File.expand_path(path) end # TODO fix def self.is_polars_dtype(data_type) data_type.is_a?(Symbol) || data_type.is_a?(String) end RB_TYPE_TO_DTYPE = { Float => :f64, Integer => :i64, String => :str, TrueClass => :bool, FalseClass => :bool, Date => :date, DateTime => :datetime } # TODO fix def self.rb_type_to_dtype(data_type) if is_polars_dtype(data_type) return data_type.to_s end begin RB_TYPE_TO_DTYPE.fetch(data_type).to_s rescue KeyError raise ArgumentError, "Conversion of Ruby data type #{data_type} to Polars data type not implemented." end end def self._process_null_values(null_values) if null_values.is_a?(Hash) null_values.to_a else null_values end end def self._prepare_row_count_args(row_count_name = nil, row_count_offset = 0) if !row_count_name.nil? [row_count_name, row_count_offset] else nil end end def self.handle_projection_columns(columns) projection = nil if columns raise Todo # if columns.is_a?(String) || columns.is_a?(Symbol) # columns = [columns] # elsif is_int_sequence(columns) # projection = columns.to_a # columns = nil # elsif !is_str_sequence(columns) # raise ArgumentError, "columns arg should contain a list of all integers or all strings values." # end end [projection, columns] end def self.scale_bytes(sz, to:) scaling_factor = { "b" => 1, "k" => 1024, "m" => 1024 ** 2, "g" => 1024 ** 3, "t" => 1024 ** 4, }[to[0]] if scaling_factor > 1 sz / scaling_factor.to_f else sz end end def self.bool?(value) value == true || value == false end def self._is_iterable_of(val, eltype) val.all? { |x| x.is_a?(eltype) } end def self.is_str_sequence(val, allow_str: false) if allow_str == false && val.is_a?(String) false else val.is_a?(Array) && _is_iterable_of(val, String) end end end end