Sha256: e06af2fba408e65fd4512c54a7c68bd041955b0694349adefc95a088933ffd62

Contents?: true

Size: 904 Bytes

Versions: 27

Compression:

Stored size: 904 Bytes

Contents

require 'marc'
require 'json'
require 'zlib'


# Read newline-delimited JSON file, where each line is a marc-in-json string.
# UTF-8 encoding is required.

class Traject::NDJReader
  include Enumerable

  def initialize(input_stream, settings)
    @settings = settings
    @input_stream = input_stream
    if @settings['command_line.filename'] =~ /\.gz$/
      @input_stream = Zlib::GzipReader.new(@input_stream, :external_encoding => "UTF-8")
    end
  end

  def logger
    @logger ||= (@settings[:logger] || Yell.new(STDERR, :level => "gt.fatal")) # null logger)
  end

  def each
    unless block_given?
      return enum_for(:each)
    end

    @input_stream.each_with_index do |json, i|
      begin
        yield MARC::Record.new_from_hash(JSON.parse(json))
      rescue Exception => e
        self.logger.error("Problem with JSON record on line #{i}: #{e.message}")
      end
    end
  end

end


Version data entries

27 entries across 27 versions & 1 rubygems

Version Path
traject-2.3.2-java lib/traject/ndj_reader.rb
traject-2.3.2 lib/traject/ndj_reader.rb
traject-2.3.1-java lib/traject/ndj_reader.rb
traject-2.3.1 lib/traject/ndj_reader.rb
traject-2.3.0-java lib/traject/ndj_reader.rb
traject-2.3.0 lib/traject/ndj_reader.rb
traject-2.2.1-java lib/traject/ndj_reader.rb
traject-2.2.1 lib/traject/ndj_reader.rb
traject-2.2.0 lib/traject/ndj_reader.rb
traject-2.2.0-java lib/traject/ndj_reader.rb
traject-2.1.0-java lib/traject/ndj_reader.rb
traject-2.1.0 lib/traject/ndj_reader.rb
traject-2.0.2-java lib/traject/ndj_reader.rb
traject-2.0.2 lib/traject/ndj_reader.rb
traject-2.0.1 lib/traject/ndj_reader.rb
traject-2.0.1-java lib/traject/ndj_reader.rb
traject-2.0.0-java lib/traject/ndj_reader.rb
traject-2.0.0 lib/traject/ndj_reader.rb
traject-2.0.0.rc.2-java lib/traject/ndj_reader.rb
traject-2.0.0.rc.2 lib/traject/ndj_reader.rb