Sha256: d400ac593274a482413be2bc81668a9052ed535abdb424696e8c033406d51d29
Contents?: true
Size: 1.63 KB
Versions: 1
Compression:
Stored size: 1.63 KB
Contents
class OrcFileReader attr_reader :reader, :orc_options, :table_schema def initialize(table_schema, path='orc_file.orc') @orc_options = OrcReaderOptions.new @table_schema = table_schema path = Path.new(path) @reader = OrcFile.createReader(path, @orc_options.orc) end def read_row(row_batch, row_index) orc_row = {} row_batch.cols.each_with_index do |column, index| column_name = @table_schema.keys[index] data_type = @table_schema[column_name] case data_type when :integer orc_row[column_name] = column.vector[row_index] when :decimal orc_row[column_name] = column.vector[row_index].get_hive_decimal.to_s.to_d when :float #sets float value as 0.0005000000237487257 instead of 0.0005 orc_row[column_name] = column.vector[row_index] when :datetime orc_row[column_name] = DateTime.strptime(column.time[row_index].to_s, '%Q').to_time.to_datetime when :time orc_row[column_name] = Time.strptime(column.time[row_index].to_s, '%Q') when :date # orc_row[column_name] = Time.at(column.vector.first * 86400).to_date orc_row[column_name] = Date.new(1970,1,1) + column.vector[row_index] when :string orc_row[column_name] = column.toString(row_index) end end orc_row end def read_from_orc rows = Array.new row_batch = @reader.get_schema.createRowBatch() @reader.rows.next_batch(row_batch) @reader.number_of_rows.times do |row_index| rows << read_row(row_batch, row_index) end rows end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
orcfile-1.0.0 | lib/orc_file_reader.rb |