Sha256: 4951b17384f0cf7cae85ca2e763c2ec317cb67711a030e41dc03fc5ae892254a

Contents?: true

Size: 1.93 KB

Versions: 1

Compression:

Stored size: 1.93 KB

Contents

# -*- encoding : utf-8 -*-
require 'stringio'

module Bychar
  VERSION = '1.1.0'

  # Default buffer size is 512k
  DEFAULT_BUFFER_SIZE = 512 * 1024
  
  # Gets raised when you have exhausted the underlying IO
  class EOFError < RuntimeError  #:nodoc: all
  end
  
  # This object helps you build parsers that parse an IO byte by byte without having to
  # read byte by byte.
  # Reading byte by byte is very inefficient, but we want to parse byte by byte since
  # this makes parser construction much easier. So what we do is cache some chunk of the
  # passed buffer and read from that. Once exhausted there will be some caching again,
  # and ad infinitum until the passed buffer is exhausted
  class Reader
    
    def initialize(with_io, buffer_size = DEFAULT_BUFFER_SIZE)
      @io = with_io
      @bufsize = buffer_size
      cache
    end

    # Will transparently read one byte off the contained IO, maintaining the internal cache.
    # If the cache has been depleted it will read a big chunk from the IO and cache it and then
    # return the byte
    def read_one_byte
      cache if @buf.nil? || @buf.eos?
      
      return nil if @buf.eos?
      return @buf.getch
    end
    
    # Tells whether all the data has been both read from the passed buffer
    # and from the internal cache buffer (checks whether there is anything that
    # can be retreived using read_one_byte)
    def eof?
      (@buf && @buf.eos?) && @io.eof?
    end
    
    # Since you parse char by char, you will be tempted to do it in a tight loop
    # and to call eof? on each iteration. Don't. Instead. allow it to raise and do not check.
    # This takes the profile time down from 36 seconds to 30 seconds for a large file.
    def read_one_byte!
      cache if @buf.eos?
      raise EOFError if @buf.eos?
      
      @buf.getch
    end
    
    private
    
    def cache
      data = @io.read(@bufsize)
      @buf = StringScanner.new(data.to_s) # Make nil become ""
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
bychar-1.1.0 lib/bychar.rb