lib/arrow/table.rb in red-arrow-10.0.1 vs lib/arrow/table.rb in red-arrow-11.0.0

- old
+ new

@@ -20,10 +20,11 @@ module Arrow class Table include ColumnContainable include GenericFilterable include GenericTakeable + include InputReferable include RecordContainable class << self def load(path, options={}) TableLoader.load(path, options) @@ -186,10 +187,11 @@ def each_record_batch return to_enum(__method__) unless block_given? reader = TableBatchReader.new(self) while record_batch = reader.read_next + share_input(record_batch) yield(record_batch) end end alias_method :size, :n_rows @@ -344,14 +346,16 @@ "Arrow::BooleanArray or Arrow::Slicer::Condition: #{slicer.inspect}" raise ArgumentError, message end end if sliced_tables.size > 1 - sliced_tables[0].concatenate(sliced_tables[1..-1]) + sliced_table = sliced_tables[0].concatenate(sliced_tables[1..-1]) else - sliced_tables[0] + sliced_table = sliced_tables[0] end + share_input(sliced_table) + sliced_table end # TODO # # @return [Arrow::Table] @@ -399,11 +403,13 @@ new_arrays = [] new_columns.each do |new_column| new_fields << new_column[:field] new_arrays << new_column[:data] end - self.class.new(new_fields, new_arrays) + table = self.class.new(new_fields, new_arrays) + share_input(table) + table end alias_method :remove_column_raw, :remove_column def remove_column(name_or_index) case name_or_index @@ -421,11 +427,13 @@ message = "out of index (0..#{n_columns - 1}): " + "#{name_or_index.inspect}: #{inspect}" raise IndexError.new(message) end end - remove_column_raw(index) + table = remove_column_raw(index) + share_input(table) + table end # Experimental def group(*keys) Group.new(self, keys) @@ -443,58 +451,75 @@ def pack packed_arrays = columns.collect do |column| column.data.pack end - self.class.new(schema, packed_arrays) + table = self.class.new(schema, packed_arrays) + share_input(table) + table end - # @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil) - # @!macro join_common_before - # @param right [Arrow::Table] The right table. + # Join another Table by matching with keys. # - # Join columns with `right` on join key columns. + # @!macro join_common_before + # @param right [Arrow::Table] The right table. # - # @!macro join_common_after - # @param type [Arrow::JoinType] How to join. - # @param left_outputs [::Array<String, Symbol>] Output columns in - # `self`. + # Join columns with `right` on join key columns. # - # If both of `left_outputs` and `right_outputs` aren't - # specified, all columns in `self` and `right` are - # outputted. - # @param right_outputs [::Array<String, Symbol>] Output columns in - # `right`. + # @!macro join_common_after + # @param type [Arrow::JoinType] How to join. + # @param left_outputs [::Array<String, Symbol>] Output columns in + # `self`. # - # If both of `left_outputs` and `right_outputs` aren't - # specified, all columns in `self` and `right` are - # outputted. - # @return [Arrow::Table] - # The joined `Arrow::Table`. + # If both of `left_outputs` and `right_outputs` aren't + # specified, all columns in `self` and `right` are + # outputted. + # @param right_outputs [::Array<String, Symbol>] Output columns in + # `right`. # + # If both of `left_outputs` and `right_outputs` aren't + # specified, all columns in `self` and `right` are + # outputted. + # @return [Arrow::Table] + # The joined `Arrow::Table`. + # + # @overload join(right, type: :inner, left_outputs: nil, right_outputs: nil) + # If key(s) are not supplied, common keys in self and right are used. + # # @macro join_common_before + # @macro join_common_after + # + # @since 11.0.0 + # + # @overload join(right, key, type: :inner, left_outputs: nil, right_outputs: nil) + # Join right by a key. + # + # @macro join_common_before # @param key [String, Symbol] A join key. # @macro join_common_after # # @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil) + # Join right by keys. # # @macro join_common_before # @param keys [::Array<String, Symbol>] Join keys. # @macro join_common_after # # @overload join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil) + # Join right by a key or keys mapped by a hash. # # @macro join_common_before # @param keys [Hash] Specify join keys in `self` and `right` separately. # @option keys [String, Symbol, ::Array<String, Symbol>] :left # Join keys in `self`. # @option keys [String, Symbol, ::Array<String, Symbol>] :right # Join keys in `right`. # @macro join_common_after # # @since 7.0.0 - def join(right, keys, type: :inner, left_outputs: nil, right_outputs: nil) + def join(right, keys=nil, type: :inner, left_outputs: nil, right_outputs: nil) + keys ||= (column_names & right.column_names) plan = ExecutePlan.new left_node = plan.build_source_node(self) right_node = plan.build_source_node(right) if keys.is_a?(Hash) left_keys = keys[:left] @@ -521,10 +546,12 @@ plan.build_sink_node(hash_join_node, sink_node_options) plan.validate plan.start plan.wait reader = sink_node_options.get_reader(hash_join_node.output_schema) - reader.read_all + table = reader.read_all + share_input(table) + table end alias_method :to_s_raw, :to_s def to_s(options={}) format = options[:format]