lib/ebnf/ll1/scanner.rb in ebnf-0.3.9 vs lib/ebnf/ll1/scanner.rb in ebnf-1.0.0
- old
+ new
@@ -1,5 +1,6 @@
+# coding: utf-8
require 'strscan' unless defined?(StringScanner)
module EBNF::LL1
##
# Overload StringScanner with file operations
@@ -7,12 +8,12 @@
# * Reloads scanner as required until EOF.
# * Loads to a high-water and reloads when remaining size reaches a low-water.
#
# FIXME: Only implements the subset required by the Lexer for now.
class Scanner < StringScanner
- HIGH_WATER = 10240
- LOW_WATER = 2048 # Hopefully large enough to deal with long multi-line comments
+ HIGH_WATER = 512 * 1024 # Hopefully large enough to deal with long multi-line comments
+ LOW_WATER = 4 * 1024
##
# @return [IO, StringIO]
attr_reader :input
@@ -23,18 +24,18 @@
# @param [Hash{Symbol => Object}] options
# @option options[Integer] :high_water (HIGH_WATER)
# @option options[Integer] :low_water (LOW_WATER)
# @return [Scanner]
def initialize(input, options = {})
- @options = options.merge(:high_water => HIGH_WATER, :low_water => LOW_WATER)
+ @options = options.merge(high_water: HIGH_WATER, low_water: LOW_WATER)
if input.respond_to?(:read)
@input = input
super("")
feed_me
else
- super(input.to_s)
+ super(encode_utf8 input.to_s)
end
end
##
# Returns the "rest" of the line, or the next line if at EOL (i.e. everything after the scan pointer).
@@ -93,24 +94,34 @@
# @return [String]
def scan(pattern)
feed_me
encode_utf8 super
end
-
- private
- # Maintain low-water mark
- def feed_me
- if rest_size < @options[:low_water] && @input && !@input.eof?
- # Read up to high-water mark ensuring we're at an end of line
+
+ ##
+ # Ensures that the input buffer is full to the high water mark, or end of file. Useful when matching tokens that may be longer than the low water mark
+ def ensure_buffer_full
+ # Read up to high-water mark ensuring we're at an end of line
+ if @input && !@input.eof?
diff = @options[:high_water] - rest_size
string = encode_utf8(@input.read(diff))
string << encode_utf8(@input.gets) unless @input.eof?
self << string if string
end
end
+ private
+ # Maintain low-water mark
+ def feed_me
+ ensure_buffer_full if rest_size < @options[:low_water]
+ end
+
# Perform UTF-8 encoding of input
def encode_utf8(string)
- string.respond_to?(:force_encoding) ? string.force_encoding(Encoding::UTF_8) : string
+ if string && string.encoding != Encoding::UTF_8
+ string = string.dup if string.frozen?
+ string.force_encoding(Encoding::UTF_8)
+ end
+ string
end
end
end
\ No newline at end of file