lib/bychar.rb in bychar-1.0.1 vs lib/bychar.rb in bychar-1.1.0
- old
+ new
@@ -1,23 +1,26 @@
# -*- encoding : utf-8 -*-
require 'stringio'
module Bychar
- VERSION = '1.0.1'
+ VERSION = '1.1.0'
# Default buffer size is 512k
DEFAULT_BUFFER_SIZE = 512 * 1024
+ # Gets raised when you have exhausted the underlying IO
+ class EOFError < RuntimeError #:nodoc: all
+ end
+
# This object helps you build parsers that parse an IO byte by byte without having to
# read byte by byte.
# Reading byte by byte is very inefficient, but we want to parse byte by byte since
# this makes parser construction much easier. So what we do is cache some chunk of the
# passed buffer and read from that. Once exhausted there will be some caching again,
# and ad infinitum until the passed buffer is exhausted
class Reader
-
def initialize(with_io, buffer_size = DEFAULT_BUFFER_SIZE)
@io = with_io
@bufsize = buffer_size
cache
end
@@ -36,12 +39,22 @@
# and from the internal cache buffer (checks whether there is anything that
# can be retreived using read_one_byte)
def eof?
(@buf && @buf.eos?) && @io.eof?
end
-
+
+ # Since you parse char by char, you will be tempted to do it in a tight loop
+ # and to call eof? on each iteration. Don't. Instead. allow it to raise and do not check.
+ # This takes the profile time down from 36 seconds to 30 seconds for a large file.
+ def read_one_byte!
+ cache if @buf.eos?
+ raise EOFError if @buf.eos?
+
+ @buf.getch
+ end
+
private
-
+
def cache
data = @io.read(@bufsize)
@buf = StringScanner.new(data.to_s) # Make nil become ""
end
end
\ No newline at end of file