Sha256: 9a9cdb375aff8d1664d0065d2044e25473b25fd910ea1bd123fff5446764c7ce

Contents?: true

Size: 1.86 KB

Versions: 7

Compression:

Stored size: 1.86 KB

Contents

# encoding: UTF-8

module JSON
  module Stream

    # A character buffer that expects a UTF-8 encoded stream of bytes.
    # This handles truncated multi-byte characters properly so we can just
    # feed it binary data and receive a properly formatted UTF-8 String as
    # output. See here for UTF-8 parsing details:
    # http://en.wikipedia.org/wiki/UTF-8
    # http://tools.ietf.org/html/rfc3629#section-3
    class Buffer
      def initialize
        @state, @buf, @need = :start, [], 0
      end

      # Fill the buffer with a String of binary UTF-8 encoded bytes. Returns
      # as much of the data in a UTF-8 String as we have. Truncated multi-byte
      # characters are saved in the buffer until the next call to this method
      # where we expect to receive the rest of the multi-byte character.
      def <<(data)
        bytes = []
        data.bytes.each do |b|
          case @state
          when :start
            if b < 128
              bytes << b
            elsif b >= 192
              @state = :multi_byte
              @buf << b
              @need = case
                when b >= 240 then 4
                when b >= 224 then 3
                when b >= 192 then 2 end
            else
              error('Expected start of multi-byte or single byte char')
            end
          when :multi_byte
            if b > 127 && b < 192
              @buf << b
              if @buf.size == @need
                bytes += @buf.slice!(0, @buf.size)
                @state = :start
              end
            else
              error('Expected continuation byte')
            end
          end
        end
        bytes.pack('C*').force_encoding(Encoding::UTF_8).tap do |str|
          error('Invalid UTF-8 byte sequence') unless str.valid_encoding?
        end
      end

      private

      def error(message)
        raise ParserError, message
      end
    end

  end
end

Version data entries

7 entries across 7 versions & 2 rubygems

Version Path
json-stream-path-0.1.0 lib/json/stream/buffer.rb
json-stream-path-0.0.3 lib/json/stream/buffer.rb
json-stream-path-0.0.2 lib/json/stream/buffer.rb
json-stream-path-0.0.1 lib/json/stream/buffer.rb
json-stream-0.1.2 lib/json/stream/buffer.rb
json-stream-0.1.1 lib/json/stream/buffer.rb
json-stream-0.1.0 lib/json/stream/buffer.rb