module Polars module Selectors # @private class SelectorProxy < Expr attr_accessor :_attrs attr_accessor :_repr_override def initialize( expr, name:, parameters: nil ) self._rbexpr = expr._rbexpr self._attrs = { name: name, params: parameters } end def inspect if !_attrs as_expr.inspect elsif _repr_override _repr_override else selector_name = _attrs[:name] params = _attrs[:params] || {} set_ops = {"and" => "&", "or" => "|", "sub" => "-", "xor" => "^"} if set_ops.include?(selector_name) op = set_ops[selector_name] "(#{params.values.map(&:inspect).join(" #{op} ")})" else str_params = params.map { |k, v| k.start_with?("*") ? v.inspect[1..-2] : "#{k}=#{v.inspect}" }.join(", ") "Polars.cs.#{selector_name}(#{str_params})" end end end def ~ if Utils.is_selector(self) inverted = Selectors.all - self inverted._repr_override = "~#{inspect}" else inverted = ~as_expr end inverted end def -(other) if Utils.is_selector(other) SelectorProxy.new( meta._as_selector.meta._selector_sub(other), parameters: {"self" => self, "other" => other}, name: "sub" ) else as_expr - other end end def &(other) if Utils.is_column(other) raise Todo end if Utils.is_selector(other) SelectorProxy.new( meta._as_selector.meta._selector_and(other), parameters: {"self" => self, "other" => other}, name: "and" ) else as_expr & other end end def |(other) if Utils.is_column(other) raise Todo end if Utils.is_selector(other) SelectorProxy.new( meta._as_selector.meta._selector_and(other), parameters: {"self" => self, "other" => other}, name: "or" ) else as_expr | other end end def ^(other) if Utils.is_column(other) raise Todo end if Utils.is_selector(other) SelectorProxy.new( meta._as_selector.meta._selector_and(other), parameters: {"self" => self, "other" => other}, name: "xor" ) else as_expr ^ other end end def as_expr Expr._from_rbexpr(_rbexpr) end end # @private def self._selector_proxy_(...) SelectorProxy.new(...) end # @private def self._re_string(string, escape: true) if string.is_a?(::String) rx = escape ? Utils.re_escape(string) : string else strings = [] string.each do |st| if st.is_a?(Array) strings.concat(st) else strings << st end end rx = strings.map { |x| escape ? Utils.re_escape(x) : x }.join("|") end "(#{rx})" end # Select all columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 1, 1)], # "value" => [1_234_500, 5_000_555] # }, # schema_overrides: {"value" => Polars::Int32} # ) # # @example Select all columns, casting them to string: # df.select(Polars.cs.all.cast(Polars::String)) # # => # # shape: (2, 2) # # ┌────────────┬─────────┐ # # │ dt ┆ value │ # # │ --- ┆ --- │ # # │ str ┆ str │ # # ╞════════════╪═════════╡ # # │ 1999-12-31 ┆ 1234500 │ # # │ 2024-01-01 ┆ 5000555 │ # # └────────────┴─────────┘ # # @example Select all columns *except* for those matching the given dtypes: # df.select(Polars.cs.all - Polars.cs.numeric) # # => # # shape: (2, 1) # # ┌────────────┐ # # │ dt │ # # │ --- │ # # │ date │ # # ╞════════════╡ # # │ 1999-12-31 │ # # │ 2024-01-01 │ # # └────────────┘ def self.all _selector_proxy_(F.all, name: "all") end # Select all columns with alphabetic names (eg: only letters). # # @param ascii_only [Boolean] # Indicate whether to consider only ASCII alphabetic characters, or the full # Unicode range of valid letters (accented, idiographic, etc). # @param ignore_spaces [Boolean] # Indicate whether to ignore the presence of spaces in column names; if so, # only the other (non-space) characters are considered. # # @return [SelectorProxy] # # @note # Matching column names cannot contain *any* non-alphabetic characters. Note # that the definition of "alphabetic" consists of all valid Unicode alphabetic # characters (`\p{Alphabetic}`) by default; this can be changed by setting # `ascii_only: true`. # # @example # df = Polars::DataFrame.new( # { # "no1" => [100, 200, 300], # "café" => ["espresso", "latte", "mocha"], # "t or f" => [true, false, nil], # "hmm" => ["aaa", "bbb", "ccc"], # "都市" => ["東京", "大阪", "京都"] # } # ) # # @example Select columns with alphabetic names; note that accented characters and kanji are recognised as alphabetic here: # df.select(Polars.cs.alpha) # # => # # shape: (3, 3) # # ┌──────────┬─────┬──────┐ # # │ café ┆ hmm ┆ 都市 │ # # │ --- ┆ --- ┆ --- │ # # │ str ┆ str ┆ str │ # # ╞══════════╪═════╪══════╡ # # │ espresso ┆ aaa ┆ 東京 │ # # │ latte ┆ bbb ┆ 大阪 │ # # │ mocha ┆ ccc ┆ 京都 │ # # └──────────┴─────┴──────┘ # # @example Constrain the definition of "alphabetic" to ASCII characters only: # df.select(Polars.cs.alpha(ascii_only: true)) # # => # # shape: (3, 1) # # ┌─────┐ # # │ hmm │ # # │ --- │ # # │ str │ # # ╞═════╡ # # │ aaa │ # # │ bbb │ # # │ ccc │ # # └─────┘ # # @example # df.select(Polars.cs.alpha(ascii_only: true, ignore_spaces: true)) # # => # # shape: (3, 2) # # ┌────────┬─────┐ # # │ t or f ┆ hmm │ # # │ --- ┆ --- │ # # │ bool ┆ str │ # # ╞════════╪═════╡ # # │ true ┆ aaa │ # # │ false ┆ bbb │ # # │ null ┆ ccc │ # # └────────┴─────┘ # # @example Select all columns *except* for those with alphabetic names: # df.select(~Polars.cs.alpha) # # => # # shape: (3, 2) # # ┌─────┬────────┐ # # │ no1 ┆ t or f │ # # │ --- ┆ --- │ # # │ i64 ┆ bool │ # # ╞═════╪════════╡ # # │ 100 ┆ true │ # # │ 200 ┆ false │ # # │ 300 ┆ null │ # # └─────┴────────┘ # # @example # df.select(~Polars.cs.alpha(ignore_spaces: true)) # # => # # shape: (3, 1) # # ┌─────┐ # # │ no1 │ # # │ --- │ # # │ i64 │ # # ╞═════╡ # # │ 100 │ # # │ 200 │ # # │ 300 │ # # └─────┘ def self.alpha(ascii_only: false, ignore_spaces: false) # note that we need to supply a pattern compatible with the *rust* regex crate re_alpha = ascii_only ? "a-zA-Z" : "\\p{Alphabetic}" re_space = ignore_spaces ? " " : "" _selector_proxy_( F.col("^[#{re_alpha}#{re_space}]+$"), name: "alpha", parameters: {"ascii_only" => ascii_only, "ignore_spaces" => ignore_spaces}, ) end # TODO # def alphanumeric # end # Select all binary columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new({"a" => ["hello".b], "b" => ["world"], "c" => ["!".b], "d" => [":)"]}) # # => # # shape: (1, 4) # # ┌──────────┬───────┬────────┬─────┐ # # │ a ┆ b ┆ c ┆ d │ # # │ --- ┆ --- ┆ --- ┆ --- │ # # │ binary ┆ str ┆ binary ┆ str │ # # ╞══════════╪═══════╪════════╪═════╡ # # │ b"hello" ┆ world ┆ b"!" ┆ :) │ # # └──────────┴───────┴────────┴─────┘ # # @example Select binary columns and export as a dict: # df.select(Polars.cs.binary).to_h(as_series: false) # # => {"a"=>["hello"], "c"=>["!"]} # # @example Select all columns *except* for those that are binary: # df.select(~Polars.cs.binary).to_h(as_series: false) # # => {"b"=>["world"], "d"=>[":)"]} def self.binary _selector_proxy_(F.col(Binary), name: "binary") end # Select all boolean columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new({"n" => 1..4}).with_columns(n_even: Polars.col("n") % 2 == 0) # # => # # shape: (4, 2) # # ┌─────┬────────┐ # # │ n ┆ n_even │ # # │ --- ┆ --- │ # # │ i64 ┆ bool │ # # ╞═════╪════════╡ # # │ 1 ┆ false │ # # │ 2 ┆ true │ # # │ 3 ┆ false │ # # │ 4 ┆ true │ # # └─────┴────────┘ # # @example Select and invert boolean columns: # df.with_columns(is_odd: Polars.cs.boolean.not_) # # => # # shape: (4, 3) # # ┌─────┬────────┬────────┐ # # │ n ┆ n_even ┆ is_odd │ # # │ --- ┆ --- ┆ --- │ # # │ i64 ┆ bool ┆ bool │ # # ╞═════╪════════╪════════╡ # # │ 1 ┆ false ┆ true │ # # │ 2 ┆ true ┆ false │ # # │ 3 ┆ false ┆ true │ # # │ 4 ┆ true ┆ false │ # # └─────┴────────┴────────┘ # # @example Select all columns *except* for those that are boolean: # df.select(~Polars.cs.boolean) # # => # # shape: (4, 1) # # ┌─────┐ # # │ n │ # # │ --- │ # # │ i64 │ # # ╞═════╡ # # │ 1 │ # # │ 2 │ # # │ 3 │ # # │ 4 │ # # └─────┘ def self.boolean _selector_proxy_(F.col(Boolean), name: "boolean") end # TODO # def by_dtype # end # TODO # def by_index # end # TODO # def by_name # end # Select all categorical columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => ["xx", "yy"], # "bar" => [123, 456], # "baz" => [2.0, 5.5] # }, # schema_overrides: {"foo" => Polars::Categorical} # ) # # @example Select all categorical columns: # df.select(Polars.cs.categorical) # # => # # shape: (2, 1) # # ┌─────┐ # # │ foo │ # # │ --- │ # # │ cat │ # # ╞═════╡ # # │ xx │ # # │ yy │ # # └─────┘ # # @example Select all columns *except* for those that are categorical: # df.select(~Polars.cs.categorical) # # => # # shape: (2, 2) # # ┌─────┬─────┐ # # │ bar ┆ baz │ # # │ --- ┆ --- │ # # │ i64 ┆ f64 │ # # ╞═════╪═════╡ # # │ 123 ┆ 2.0 │ # # │ 456 ┆ 5.5 │ # # └─────┴─────┘ def self.categorical _selector_proxy_(F.col(Categorical), name: "categorical") end # Select columns whose names contain the given literal substring(s). # # @param substring [Object] # Substring(s) that matching column names should contain. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => ["x", "y"], # "bar" => [123, 456], # "baz" => [2.0, 5.5], # "zap" => [false, true] # } # ) # # @example Select columns that contain the substring 'ba': # df.select(Polars.cs.contains("ba")) # # => # # shape: (2, 2) # # ┌─────┬─────┐ # # │ bar ┆ baz │ # # │ --- ┆ --- │ # # │ i64 ┆ f64 │ # # ╞═════╪═════╡ # # │ 123 ┆ 2.0 │ # # │ 456 ┆ 5.5 │ # # └─────┴─────┘ # # @example Select columns that contain the substring 'ba' or the letter 'z': # df.select(Polars.cs.contains("ba", "z")) # # => # # shape: (2, 3) # # ┌─────┬─────┬───────┐ # # │ bar ┆ baz ┆ zap │ # # │ --- ┆ --- ┆ --- │ # # │ i64 ┆ f64 ┆ bool │ # # ╞═════╪═════╪═══════╡ # # │ 123 ┆ 2.0 ┆ false │ # # │ 456 ┆ 5.5 ┆ true │ # # └─────┴─────┴───────┘ # # @example Select all columns *except* for those that contain the substring 'ba': # df.select(~Polars.cs.contains("ba")) # # => # # shape: (2, 2) # # ┌─────┬───────┐ # # │ foo ┆ zap │ # # │ --- ┆ --- │ # # │ str ┆ bool │ # # ╞═════╪═══════╡ # # │ x ┆ false │ # # │ y ┆ true │ # # └─────┴───────┘ def self.contains(*substring) escaped_substring = _re_string(substring) raw_params = "^.*#{escaped_substring}.*$" _selector_proxy_( F.col(raw_params), name: "contains", parameters: {"*substring" => escaped_substring} ) end # Select all date columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)], # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)] # } # ) # # @example Select all date columns: # df.select(Polars.cs.date) # # => # # shape: (2, 1) # # ┌────────────┐ # # │ dt │ # # │ --- │ # # │ date │ # # ╞════════════╡ # # │ 1999-12-31 │ # # │ 2024-08-09 │ # # └────────────┘ # # @example Select all columns *except* for those that are dates: # df.select(~Polars.cs.date) # # => # # shape: (2, 1) # # ┌─────────────────────┐ # # │ dtm │ # # │ --- │ # # │ datetime[ns] │ # # ╞═════════════════════╡ # # │ 2001-05-07 10:25:00 │ # # │ 2031-12-31 00:30:00 │ # # └─────────────────────┘ def self.date _selector_proxy_(F.col(Date), name: "date") end # TODO # def datetime # end # Select all decimal columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => ["x", "y"], # "bar" => [BigDecimal("123"), BigDecimal("456")], # "baz" => [BigDecimal("2.0005"), BigDecimal("-50.5555")], # }, # schema_overrides: {"baz" => Polars::Decimal.new(10, 5)} # ) # # @example Select all decimal columns: # df.select(Polars.cs.decimal) # # => # # shape: (2, 2) # # ┌──────────────┬───────────────┐ # # │ bar ┆ baz │ # # │ --- ┆ --- │ # # │ decimal[*,0] ┆ decimal[10,5] │ # # ╞══════════════╪═══════════════╡ # # │ 123 ┆ 2.00050 │ # # │ 456 ┆ -50.55550 │ # # └──────────────┴───────────────┘ # # @example Select all columns *except* the decimal ones: # # df.select(~Polars.cs.decimal) # # => # # shape: (2, 1) # # ┌─────┐ # # │ foo │ # # │ --- │ # # │ str │ # # ╞═════╡ # # │ x │ # # │ y │ # # └─────┘ def self.decimal # TODO: allow explicit selection by scale/precision? _selector_proxy_(F.col(Decimal), name: "decimal") end # Select columns that end with the given substring(s). # # @param suffix [Object] # Substring(s) that matching column names should end with. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => ["x", "y"], # "bar" => [123, 456], # "baz" => [2.0, 5.5], # "zap" => [false, true] # } # ) # # @example Select columns that end with the substring 'z': # df.select(Polars.cs.ends_with("z")) # # => # # shape: (2, 1) # # ┌─────┐ # # │ baz │ # # │ --- │ # # │ f64 │ # # ╞═════╡ # # │ 2.0 │ # # │ 5.5 │ # # └─────┘ # # @example Select columns that end with *either* the letter 'z' or 'r': # df.select(Polars.cs.ends_with("z", "r")) # # => # # shape: (2, 2) # # ┌─────┬─────┐ # # │ bar ┆ baz │ # # │ --- ┆ --- │ # # │ i64 ┆ f64 │ # # ╞═════╪═════╡ # # │ 123 ┆ 2.0 │ # # │ 456 ┆ 5.5 │ # # └─────┴─────┘ # # @example Select all columns *except* for those that end with the substring 'z': # df.select(~Polars.cs.ends_with("z")) # # => # # shape: (2, 3) # # ┌─────┬─────┬───────┐ # # │ foo ┆ bar ┆ zap │ # # │ --- ┆ --- ┆ --- │ # # │ str ┆ i64 ┆ bool │ # # ╞═════╪═════╪═══════╡ # # │ x ┆ 123 ┆ false │ # # │ y ┆ 456 ┆ true │ # # └─────┴─────┴───────┘ def self.ends_with(*suffix) escaped_suffix = _re_string(suffix) raw_params = "^.*#{escaped_suffix}$" _selector_proxy_( F.col(raw_params), name: "ends_with", parameters: {"*suffix" => escaped_suffix}, ) end # Select the first column in the current scope. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => ["x", "y"], # "bar" => [123, 456], # "baz" => [2.0, 5.5], # "zap" => [0, 1] # } # ) # # @example Select the first column: # df.select(Polars.cs.first) # # => # # shape: (2, 1) # # ┌─────┐ # # │ foo │ # # │ --- │ # # │ str │ # # ╞═════╡ # # │ x │ # # │ y │ # # └─────┘ # # @example Select everything *except* for the first column: # df.select(~Polars.cs.first) # # => # # shape: (2, 3) # # ┌─────┬─────┬─────┐ # # │ bar ┆ baz ┆ zap │ # # │ --- ┆ --- ┆ --- │ # # │ i64 ┆ f64 ┆ i64 │ # # ╞═════╪═════╪═════╡ # # │ 123 ┆ 2.0 ┆ 0 │ # # │ 456 ┆ 5.5 ┆ 1 │ # # └─────┴─────┴─────┘ def self.first _selector_proxy_(F.first, name: "first") end # Select all float columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => ["x", "y"], # "bar" => [123, 456], # "baz" => [2.0, 5.5], # "zap" => [0.0, 1.0] # }, # schema_overrides: {"baz" => Polars::Float32, "zap" => Polars::Float64} # ) # # @example Select all float columns: # df.select(Polars.cs.float) # # => # # shape: (2, 2) # # ┌─────┬─────┐ # # │ baz ┆ zap │ # # │ --- ┆ --- │ # # │ f32 ┆ f64 │ # # ╞═════╪═════╡ # # │ 2.0 ┆ 0.0 │ # # │ 5.5 ┆ 1.0 │ # # └─────┴─────┘ # # @example Select all columns *except* for those that are float: # df.select(~Polars.cs.float) # # => # # shape: (2, 2) # # ┌─────┬─────┐ # # │ foo ┆ bar │ # # │ --- ┆ --- │ # # │ str ┆ i64 │ # # ╞═════╪═════╡ # # │ x ┆ 123 │ # # │ y ┆ 456 │ # # └─────┴─────┘ def self.float _selector_proxy_(F.col(FLOAT_DTYPES), name: "float") end # Select all integer columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => ["x", "y"], # "bar" => [123, 456], # "baz" => [2.0, 5.5], # "zap" => [0, 1] # } # ) # # @example Select all integer columns: # df.select(Polars.cs.integer) # # => # # shape: (2, 2) # # ┌─────┬─────┐ # # │ bar ┆ zap │ # # │ --- ┆ --- │ # # │ i64 ┆ i64 │ # # ╞═════╪═════╡ # # │ 123 ┆ 0 │ # # │ 456 ┆ 1 │ # # └─────┴─────┘ # # @example Select all columns *except* for those that are integer: # df.select(~Polars.cs.integer) # # => # # shape: (2, 2) # # ┌─────┬─────┐ # # │ foo ┆ baz │ # # │ --- ┆ --- │ # # │ str ┆ f64 │ # # ╞═════╪═════╡ # # │ x ┆ 2.0 │ # # │ y ┆ 5.5 │ # # └─────┴─────┘ def self.integer _selector_proxy_(F.col(INTEGER_DTYPES), name: "integer") end # Select all signed integer columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => [-123, -456], # "bar" => [3456, 6789], # "baz" => [7654, 4321], # "zap" => ["ab", "cd"] # }, # schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64} # ) # # @example Select all signed integer columns: # df.select(Polars.cs.signed_integer) # # => # # shape: (2, 1) # # ┌──────┐ # # │ foo │ # # │ --- │ # # │ i64 │ # # ╞══════╡ # # │ -123 │ # # │ -456 │ # # └──────┘ # # @example # df.select(~Polars.cs.signed_integer) # # => # # shape: (2, 3) # # ┌──────┬──────┬─────┐ # # │ bar ┆ baz ┆ zap │ # # │ --- ┆ --- ┆ --- │ # # │ u32 ┆ u64 ┆ str │ # # ╞══════╪══════╪═════╡ # # │ 3456 ┆ 7654 ┆ ab │ # # │ 6789 ┆ 4321 ┆ cd │ # # └──────┴──────┴─────┘ # # @example Select all integer columns (both signed and unsigned): # df.select(Polars.cs.integer) # # => # # shape: (2, 3) # # ┌──────┬──────┬──────┐ # # │ foo ┆ bar ┆ baz │ # # │ --- ┆ --- ┆ --- │ # # │ i64 ┆ u32 ┆ u64 │ # # ╞══════╪══════╪══════╡ # # │ -123 ┆ 3456 ┆ 7654 │ # # │ -456 ┆ 6789 ┆ 4321 │ # # └──────┴──────┴──────┘ def self.signed_integer _selector_proxy_(F.col(SIGNED_INTEGER_DTYPES), name: "signed_integer") end # Select all unsigned integer columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => [-123, -456], # "bar" => [3456, 6789], # "baz" => [7654, 4321], # "zap" => ["ab", "cd"] # }, # schema_overrides: {"bar" => Polars::UInt32, "baz" => Polars::UInt64} # ) # # @example Select all unsigned integer columns: # df.select(Polars.cs.unsigned_integer) # # => # # shape: (2, 2) # # ┌──────┬──────┐ # # │ bar ┆ baz │ # # │ --- ┆ --- │ # # │ u32 ┆ u64 │ # # ╞══════╪══════╡ # # │ 3456 ┆ 7654 │ # # │ 6789 ┆ 4321 │ # # └──────┴──────┘ # # @example Select all columns *except* for those that are unsigned integers: # df.select(~Polars.cs.unsigned_integer) # # => # # shape: (2, 2) # # ┌──────┬─────┐ # # │ foo ┆ zap │ # # │ --- ┆ --- │ # # │ i64 ┆ str │ # # ╞══════╪═════╡ # # │ -123 ┆ ab │ # # │ -456 ┆ cd │ # # └──────┴─────┘ # # @example Select all integer columns (both signed and unsigned): # df.select(Polars.cs.integer) # # => # # shape: (2, 3) # # ┌──────┬──────┬──────┐ # # │ foo ┆ bar ┆ baz │ # # │ --- ┆ --- ┆ --- │ # # │ i64 ┆ u32 ┆ u64 │ # # ╞══════╪══════╪══════╡ # # │ -123 ┆ 3456 ┆ 7654 │ # # │ -456 ┆ 6789 ┆ 4321 │ # # └──────┴──────┴──────┘ def self.unsigned_integer _selector_proxy_(F.col(UNSIGNED_INTEGER_DTYPES), name: "unsigned_integer") end # Select the last column in the current scope. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => ["x", "y"], # "bar" => [123, 456], # "baz" => [2.0, 5.5], # "zap" => [0, 1] # } # ) # # @example Select the last column: # df.select(Polars.cs.last) # # => # # shape: (2, 1) # # ┌─────┐ # # │ zap │ # # │ --- │ # # │ i64 │ # # ╞═════╡ # # │ 0 │ # # │ 1 │ # # └─────┘ # # @example Select everything *except* for the last column: # df.select(~Polars.cs.last) # # => # # shape: (2, 3) # # ┌─────┬─────┬─────┐ # # │ foo ┆ bar ┆ baz │ # # │ --- ┆ --- ┆ --- │ # # │ str ┆ i64 ┆ f64 │ # # ╞═════╪═════╪═════╡ # # │ x ┆ 123 ┆ 2.0 │ # # │ y ┆ 456 ┆ 5.5 │ # # └─────┴─────┴─────┘ def self.last _selector_proxy_(F.last, name: "last") end # Select all numeric columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => ["x", "y"], # "bar" => [123, 456], # "baz" => [2.0, 5.5], # "zap" => [0, 0] # }, # schema_overrides: {"bar" => Polars::Int16, "baz" => Polars::Float32, "zap" => Polars::UInt8}, # ) # # @example Match all numeric columns: # df.select(Polars.cs.numeric) # # => # # shape: (2, 3) # # ┌─────┬─────┬─────┐ # # │ bar ┆ baz ┆ zap │ # # │ --- ┆ --- ┆ --- │ # # │ i16 ┆ f32 ┆ u8 │ # # ╞═════╪═════╪═════╡ # # │ 123 ┆ 2.0 ┆ 0 │ # # │ 456 ┆ 5.5 ┆ 0 │ # # └─────┴─────┴─────┘ # # @example Match all columns *except* for those that are numeric: # df.select(~Polars.cs.numeric) # # => # # shape: (2, 1) # # ┌─────┐ # # │ foo │ # # │ --- │ # # │ str │ # # ╞═════╡ # # │ x │ # # │ y │ # # └─────┘ def self.numeric _selector_proxy_(F.col(NUMERIC_DTYPES), name: "numeric") end # Select columns that start with the given substring(s). # # @param prefix [Object] # Substring(s) that matching column names should start with. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "foo" => [1.0, 2.0], # "bar" => [3.0, 4.0], # "baz" => [5, 6], # "zap" => [7, 8] # } # ) # # @example Match columns starting with a 'b': # df.select(Polars.cs.starts_with("b")) # # => # # shape: (2, 2) # # ┌─────┬─────┐ # # │ bar ┆ baz │ # # │ --- ┆ --- │ # # │ f64 ┆ i64 │ # # ╞═════╪═════╡ # # │ 3.0 ┆ 5 │ # # │ 4.0 ┆ 6 │ # # └─────┴─────┘ # # @example Match columns starting with *either* the letter 'b' or 'z': # df.select(Polars.cs.starts_with("b", "z")) # # => # # shape: (2, 3) # # ┌─────┬─────┬─────┐ # # │ bar ┆ baz ┆ zap │ # # │ --- ┆ --- ┆ --- │ # # │ f64 ┆ i64 ┆ i64 │ # # ╞═════╪═════╪═════╡ # # │ 3.0 ┆ 5 ┆ 7 │ # # │ 4.0 ┆ 6 ┆ 8 │ # # └─────┴─────┴─────┘ # # @example Match all columns *except* for those starting with 'b': # df.select(~Polars.cs.starts_with("b")) # # => # # shape: (2, 2) # # ┌─────┬─────┐ # # │ foo ┆ zap │ # # │ --- ┆ --- │ # # │ f64 ┆ i64 │ # # ╞═════╪═════╡ # # │ 1.0 ┆ 7 │ # # │ 2.0 ┆ 8 │ # # └─────┴─────┘ def self.starts_with(*prefix) escaped_prefix = _re_string(prefix) raw_params = "^#{escaped_prefix}.*$" _selector_proxy_( F.col(raw_params), name: "starts_with", parameters: {"*prefix" => prefix} ) end # Select all String (and, optionally, Categorical) string columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "w" => ["xx", "yy", "xx", "yy", "xx"], # "x" => [1, 2, 1, 4, -2], # "y" => [3.0, 4.5, 1.0, 2.5, -2.0], # "z" => ["a", "b", "a", "b", "b"] # }, # ).with_columns( # z: Polars.col("z").cast(Polars::Categorical.new("lexical")), # ) # # @example Group by all string columns, sum the numeric columns, then sort by the string cols: # >>> df.group_by(Polars.cs.string).agg(Polars.cs.numeric.sum).sort(Polars.cs.string) # shape: (2, 3) # ┌─────┬─────┬─────┐ # │ w ┆ x ┆ y │ # │ --- ┆ --- ┆ --- │ # │ str ┆ i64 ┆ f64 │ # ╞═════╪═════╪═════╡ # │ xx ┆ 0 ┆ 2.0 │ # │ yy ┆ 6 ┆ 7.0 │ # └─────┴─────┴─────┘ # # @example Group by all string *and* categorical columns: # df.group_by(Polars.cs.string(include_categorical: true)).agg(Polars.cs.numeric.sum).sort( # Polars.cs.string(include_categorical: true) # ) # # => # # shape: (3, 4) # # ┌─────┬─────┬─────┬──────┐ # # │ w ┆ z ┆ x ┆ y │ # # │ --- ┆ --- ┆ --- ┆ --- │ # # │ str ┆ cat ┆ i64 ┆ f64 │ # # ╞═════╪═════╪═════╪══════╡ # # │ xx ┆ a ┆ 2 ┆ 4.0 │ # # │ xx ┆ b ┆ -2 ┆ -2.0 │ # # │ yy ┆ b ┆ 6 ┆ 7.0 │ # # └─────┴─────┴─────┴──────┘ def self.string(include_categorical: false) string_dtypes = [String] if include_categorical string_dtypes << Categorical end _selector_proxy_( F.col(string_dtypes), name: "string", parameters: {"include_categorical" => include_categorical}, ) end # Select all time columns. # # @return [SelectorProxy] # # @example # df = Polars::DataFrame.new( # { # "dtm" => [DateTime.new(2001, 5, 7, 10, 25), DateTime.new(2031, 12, 31, 0, 30)], # "dt" => [Date.new(1999, 12, 31), Date.new(2024, 8, 9)], # "tm" => [Time.utc(2001, 1, 1, 0, 0, 0), Time.utc(2001, 1, 1, 23, 59, 59)] # }, # schema_overrides: {"tm" => Polars::Time} # ) # # @example Select all time columns: # df.select(Polars.cs.time) # # => # # shape: (2, 1) # # ┌──────────┐ # # │ tm │ # # │ --- │ # # │ time │ # # ╞══════════╡ # # │ 00:00:00 │ # # │ 23:59:59 │ # # └──────────┘ # # @example Select all columns *except* for those that are times: # df.select(~Polars.cs.time) # # => # # shape: (2, 2) # # ┌─────────────────────┬────────────┐ # # │ dtm ┆ dt │ # # │ --- ┆ --- │ # # │ datetime[ns] ┆ date │ # # ╞═════════════════════╪════════════╡ # # │ 2001-05-07 10:25:00 ┆ 1999-12-31 │ # # │ 2031-12-31 00:30:00 ┆ 2024-08-09 │ # # └─────────────────────┴────────────┘ def self.time _selector_proxy_(F.col(Time), name: "time") end end def self.cs Polars::Selectors end end