# frozen_string_literal: true module RedAmber # data frame class # @table : holds Arrow::Table object class DataFrame # mix-in include DataFrameDisplayable include DataFrameHelper include DataFrameSelectable include DataFrameObservationOperation include DataFrameVariableOperation def initialize(*args) # DataFrame.new, DataFrame.new([]), DataFrame.new({}), DataFrame.new(nil) # returns empty DataFrame @table = Arrow::Table.new({}, []) # bug in gobject-introspection: ruby-gnome/ruby-gnome#1472 # [Arrow::Table] == [nil] shows ArgumentError # temporary use yoda condition to workaround return if args.empty? || args == [[]] || args == [{}] || [nil] == args if args.size > 1 @table = Arrow::Table.new(*args) else arg = args[0] @table = case arg when Arrow::Table then arg when DataFrame then arg.table when Rover::DataFrame then Arrow::Table.new(arg.to_h) when Hash then Arrow::Table.new(arg) else raise DataFrameTypeError, "invalid argument: #{arg}" end end end def self.load(path, options = {}) DataFrame.new(Arrow::Table.load(path, options)) end attr_reader :table def save(output, options = {}) @table.save(output, options) end # Properties === def size @table.n_rows end alias_method :n_rows, :size alias_method :n_obs, :size def n_keys @table.n_columns end alias_method :n_cols, :n_keys alias_method :n_vars, :n_keys def shape [size, n_keys] end def keys @table.columns.map { |column| column.name.to_sym } end alias_method :column_names, :keys alias_method :var_names, :keys def key?(key) keys.include?(key.to_sym) end alias_method :has_key?, :key? def key_index(key) keys.find_index(key.to_sym) end alias_method :find_index, :key_index alias_method :index, :key_index def types @table.columns.map do |column| column.data.value_type.nick.to_sym end end def data_types @table.columns.map do |column| column.data_type.class end end def vectors @table.columns.map do |column| Vector.new(column.data) end end def indexes 0...size end alias_method :indices, :indexes def to_h @table.columns.each_with_object({}) do |column, result| result[column.name.to_sym] = column.entries end end def to_a # output an array of row-oriented data without header # if you need column-oriented array, use `.to_h.to_a` @table.raw_records end alias_method :raw_records, :to_a def schema keys.zip(types).to_h end def ==(other) other.is_a?(DataFrame) && @table == other.table end def empty? @table.columns.empty? end def to_rover Rover::DataFrame.new(to_h) end # def to_parquet() end end end