lib/support/base_tokenizer.rb in rley-0.7.07 vs lib/support/base_tokenizer.rb in rley-0.7.08
- old
+ new
@@ -1,25 +1,31 @@
# frozen_string_literal: true
require 'strscan'
require_relative '../rley/lexical/token'
+# Simplistic tokenizer used mostly for testing purposes
class BaseTokenizer
+ # @return [StringScanner]
attr_reader(:scanner)
+
+ # @return [Integer] current line number
attr_reader(:lineno)
+
+ # @return [Integer] position of start of current line in source text
attr_reader(:line_start)
-
+
class ScanError < StandardError; end
- # Constructor. Initialize a tokenizer for Skeem.
+ # Constructor. Initialize a tokenizer.
# @param source [String] Skeem text to tokenize.
def initialize(source)
@scanner = StringScanner.new('')
restart(source)
end
- # @param source [String] Skeem text to tokenize.
+ # @param source [String] input text to tokenize.
def restart(source)
@scanner.string = source
@lineno = 1
@line_start = 0
end
@@ -32,17 +38,17 @@
tok_sequence << token unless token.nil?
end
return tok_sequence
end
-
+
protected
-
+
# Patterns:
# Unambiguous single character
# Conditional single character:
- # (e.g. '+' operator, '+' prefix for positive numbers)
+ # (e.g. '+' operator, '+' prefix for positive numbers)
def _next_token
skip_whitespaces
curr_ch = scanner.peek(1)
return nil if curr_ch.nil? || curr_ch.empty?
@@ -55,15 +61,15 @@
raise ScanError, "Unknown token #{erroneous} on line #{lineno}"
end
return token
end
-
+
def recognize_token
raise NotImplementedError
end
-
+
def build_token(aSymbolName, aLexeme, aFormat = :default)
begin
value = convert_to(aLexeme, aSymbolName, aFormat)
col = scanner.pos - aLexeme.size - @line_start + 1
pos = Rley::Lexical::Position.new(@lineno, col)
@@ -73,15 +79,15 @@
raise e
end
return token
end
-
+
def convert_to(aLexeme, _symbol_name, _format)
return aLexeme
end
-
+
def skip_whitespaces
pre_pos = scanner.pos
loop do
ws_found = false
@@ -91,23 +97,18 @@
found = scanner.skip(/(?:\r\n)|\r|\n/)
if found
ws_found = true
next_line
end
- # next_ch = scanner.peek(1)
- # if next_ch == ';'
- # cmt_found = true
- # scanner.skip(/;[^\r\n]*(?:(?:\r\n)|\r|\n)?/)
- # next_line
- # end
+
break unless ws_found || cmt_found
end
curr_pos = scanner.pos
return if curr_pos == pre_pos
end
-
+
def next_line
@lineno += 1
@line_start = scanner.pos
- end
+ end
end # class