Sha256: fdf46471352dd604367948270b9aa0f9d0e87a80484cb7c58db387f7479d5453

Contents?: true

Size: 1010 Bytes

Versions: 1

Compression:

Stored size: 1010 Bytes

Contents

require "arrow"

module DatasetsArrow
  module Arrowable
    def to_arrow
      data_path = arrow_data_path
      if data_path.exist?
        Arrow::Table.load(data_path)
      else
        raw_table = {}
        to_table.to_h.each do |name, values|
          raw_table[name] = Arrow::ArrayBuilder.build(values)
        end
        table = Arrow::Table.new(raw_table)
        table.save(data_path)
        table
      end
    end

    def each_record_batch(&block)
      return to_enum(__method__) unless block_given?

      data_path = arrow_data_path
      if data_path.exist?
        input = Arrow::MemoryMappedInputStream.new(data_path.to_path)
        reader = Arrow::RecordBatchFileReader.new(input)
        reader.each do |record_batch|
          record_batch.instance_variable_set(:@input, input)
          yield(record_batch)
        end
      else
        to_arrow.each_record_batch(&block)
      end
    end

    private
    def arrow_data_path
      cache_dir_path + "data.arrow"
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
red-datasets-arrow-0.0.2 lib/datasets-arrow/arrowable.rb