lib/bychar.rb in bychar-1.0.1 vs lib/bychar.rb in bychar-1.1.0

- old
+ new

@@ -1,23 +1,26 @@ # -*- encoding : utf-8 -*- require 'stringio' module Bychar - VERSION = '1.0.1' + VERSION = '1.1.0' # Default buffer size is 512k DEFAULT_BUFFER_SIZE = 512 * 1024 + # Gets raised when you have exhausted the underlying IO + class EOFError < RuntimeError #:nodoc: all + end + # This object helps you build parsers that parse an IO byte by byte without having to # read byte by byte. # Reading byte by byte is very inefficient, but we want to parse byte by byte since # this makes parser construction much easier. So what we do is cache some chunk of the # passed buffer and read from that. Once exhausted there will be some caching again, # and ad infinitum until the passed buffer is exhausted class Reader - def initialize(with_io, buffer_size = DEFAULT_BUFFER_SIZE) @io = with_io @bufsize = buffer_size cache end @@ -36,12 +39,22 @@ # and from the internal cache buffer (checks whether there is anything that # can be retreived using read_one_byte) def eof? (@buf && @buf.eos?) && @io.eof? end - + + # Since you parse char by char, you will be tempted to do it in a tight loop + # and to call eof? on each iteration. Don't. Instead. allow it to raise and do not check. + # This takes the profile time down from 36 seconds to 30 seconds for a large file. + def read_one_byte! + cache if @buf.eos? + raise EOFError if @buf.eos? + + @buf.getch + end + private - + def cache data = @io.read(@bufsize) @buf = StringScanner.new(data.to_s) # Make nil become "" end end \ No newline at end of file