Sha256: 1df146018183f9661ca11656dc8a57c9223e0fcd0d4b984f43bbbaf0a07f4c98

Contents?: true

Size: 1.85 KB

Versions: 37

Compression:

Stored size: 1.85 KB

Contents

module Embulk
  module Guess
    class JsonGuessPlugin < GuessPlugin
      Plugin.register_guess('json', self)

      java_import 'com.google.common.collect.Lists'
      java_import 'java.io.ByteArrayInputStream'
      java_import 'org.embulk.spi.Exec'
      java_import 'org.embulk.spi.json.JsonParser'
      java_import 'org.embulk.spi.json.JsonParseException'
      java_import 'org.embulk.spi.util.FileInputInputStream'
      java_import 'org.embulk.spi.util.InputStreamFileInput'

      def guess(config, sample_buffer)
        return {} unless config.fetch("parser", {}).fetch("type", "json") == "json"

        # Use org.embulk.spi.json.JsonParser to respond to multi-line Json
        json_parser = new_json_parser(sample_buffer)
        one_json_parsed = false
        begin
          while (v = json_parser.next)
            # "v" needs to be JSON object type (isMapValue) because:
            # 1) Single-column CSV can be mis-guessed as JSON if JSON non-objects are accepted.
            # 2) JsonParserPlugin accepts only the JSON object type.
            raise JsonParseException.new("v must be JSON object type") unless v.isMapValue
            one_json_parsed = true
          end
        rescue JsonParseException
          # the exception is ignored
        end

        if one_json_parsed
          return {"parser" => {"type" => "json"}} # if JsonParser can parse even one JSON data
        else
          return {}
        end
      end

      private

      def new_json_parser(buffer)
        input_streams = Lists::newArrayList(ByteArrayInputStream.new(buffer.to_java_bytes))
        iterator_provider = InputStreamFileInput::IteratorProvider.new(input_streams)
        input = FileInputInputStream.new(InputStreamFileInput.new(Java::SPI::Exec.getBufferAllocator(), iterator_provider))
        input.nextFile
        JsonParser.new.open(input)
      end
    end
  end
end

Version data entries

37 entries across 37 versions & 1 rubygems

Version Path
embulk-0.10.26-java lib/embulk/guess/json.rb
embulk-0.10.25-java lib/embulk/guess/json.rb
embulk-0.10.24-java lib/embulk/guess/json.rb
embulk-0.8.39 lib/embulk/guess/json.rb
embulk-0.8.39-java lib/embulk/guess/json.rb
embulk-0.8.38 lib/embulk/guess/json.rb
embulk-0.8.38-java lib/embulk/guess/json.rb
embulk-0.8.37 lib/embulk/guess/json.rb
embulk-0.8.37-java lib/embulk/guess/json.rb
embulk-0.8.36 lib/embulk/guess/json.rb
embulk-0.8.36-java lib/embulk/guess/json.rb
embulk-0.8.35 lib/embulk/guess/json.rb
embulk-0.8.35-java lib/embulk/guess/json.rb
embulk-0.8.34 lib/embulk/guess/json.rb
embulk-0.8.34-java lib/embulk/guess/json.rb
embulk-0.8.33 lib/embulk/guess/json.rb
embulk-0.8.33-java lib/embulk/guess/json.rb
embulk-0.8.32 lib/embulk/guess/json.rb
embulk-0.8.32-java lib/embulk/guess/json.rb
embulk-0.8.31 lib/embulk/guess/json.rb