lib/arrow/table.rb in red-arrow-0.14.1 vs lib/arrow/table.rb in red-arrow-0.15.0

- old
+ new

@@ -13,15 +13,17 @@ # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +require "arrow/column-containable" require "arrow/group" require "arrow/record-containable" module Arrow class Table + include ColumnContainable include RecordContainable class << self def load(path, options={}) TableLoader.load(path, options) @@ -72,10 +74,28 @@ # Arrow::BooleanArray.new([nil, nil, false]), # ] # Arrow::Table.new("count" => Arrow::ChunkedArray.new(count_chunks), # "visible" => Arrow::ChunkedArray.new(visible_chunks)) # + # @overload initialize(raw_table) + # + # @param raw_table [Hash<String, ::Array>] + # The pairs of column name and values of the table. Column values is + # `Array`. + # + # @example Create a table from column name and values + # count_chunks = [ + # Arrow::UInt32Array.new([0, 2]), + # Arrow::UInt32Array.new([nil, 4]), + # ] + # visible_chunks = [ + # Arrow::BooleanArray.new([true]), + # Arrow::BooleanArray.new([nil, nil, false]), + # ] + # Arrow::Table.new("count" => [0, 2, nil, 4], + # "visible" => [true, nil, nil, false]) + # # @overload initialize(schema, columns) # # @param schema [Arrow::Schema] The schema of the table. # You can also specify schema as primitive Ruby objects. # See {Arrow::Schema#initialize} for details. @@ -150,42 +170,42 @@ def initialize(*args) n_args = args.size case n_args when 1 if args[0][0].is_a?(Column) - values = args[0] - fields = values.collect(&:field) + columns = args[0] + fields = columns.collect(&:field) + values = columns.collect(&:data) schema = Schema.new(fields) else raw_table = args[0] fields = [] values = [] raw_table.each do |name, array| - field = Field.new(name.to_s, array.value_data_type) - fields << field - values << Column.new(field, array) + array = ArrayBuilder.build(array) if array.is_a?(::Array) + fields << Field.new(name.to_s, array.value_data_type) + values << array end schema = Schema.new(fields) end when 2 schema = args[0] schema = Schema.new(schema) unless schema.is_a?(Schema) values = args[1] - if values[0].is_a?(::Array) + case values[0] + when ::Array values = [RecordBatch.new(schema, values)] + when Column + values = values.collect(&:data) end else - message = "wrong number of arguments (given, #{n_args}, expected 1..2)" + message = "wrong number of arguments (given #{n_args}, expected 1..2)" raise ArgumentError, message end initialize_raw(schema, values) end - def columns - @columns ||= n_columns.times.collect {|i| get_column(i)} - end - def each_record_batch return to_enum(__method__) unless block_given? reader = TableBatchReader.new(self) while record_batch = reader.read_next @@ -336,19 +356,20 @@ case other when Hash other.each do |name, value| name = name.to_s if value - added_columns[name] = ensure_column(name, value) + added_columns[name] = ensure_raw_column(name, value) else removed_columns[name] = true end end when Table added_columns = {} other.columns.each do |column| - added_columns[column.name] = column + name = column.name + added_columns[name] = ensure_raw_column(name, column) end else message = "merge target must be Hash or Arrow::Table: " + "<#{other.inspect}>: #{inspect}" raise ArgumentError, message @@ -361,19 +382,22 @@ if new_column new_columns << new_column next end next if removed_columns.key?(column_name) - new_columns << column + new_columns << ensure_raw_column(column_name, column) end added_columns.each do |name, new_column| new_columns << new_column end - new_fields = new_columns.collect do |new_column| - new_column.field + new_fields = [] + new_arrays = [] + new_columns.each do |new_column| + new_fields << new_column[:field] + new_arrays << new_column[:data] end - self.class.new(Schema.new(new_fields), new_columns) + self.class.new(new_fields, new_arrays) end alias_method :remove_column_raw, :remove_column def remove_column(name_or_index) case name_or_index @@ -445,14 +469,14 @@ saver = TableSaver.new(self, path, options) saver.save end def pack - packed_columns = columns.collect do |column| - column.pack + packed_arrays = columns.collect do |column| + column.data.pack end - self.class.new(schema, packed_columns) + self.class.new(schema, packed_arrays) end alias_method :to_s_raw, :to_s def to_s(options={}) format = options[:format] @@ -522,16 +546,29 @@ else sliced_table[0] end end - def ensure_column(name, data) + def ensure_raw_column(name, data) case data when Array - field = Field.new(name, data.value_data_type) - Column.new(field, data) + { + field: Field.new(name, data.value_data_type), + data: ChunkedArray.new([data]), + } + when ChunkedArray + { + field: Field.new(name, data.value_data_type), + data: data, + } when Column - data + column = data + data = column.data + data = ChunkedArray.new([data]) unless data.is_a?(ChunkedArray) + { + field: column.field, + data: data, + } else message = "column must be Arrow::Array or Arrow::Column: " + "<#{name}>: <#{data.inspect}>: #{inspect}" raise ArgumentError, message end