lib/support/base_tokenizer.rb in rley-0.7.07 vs lib/support/base_tokenizer.rb in rley-0.7.08

- old
+ new

@@ -1,25 +1,31 @@ # frozen_string_literal: true require 'strscan' require_relative '../rley/lexical/token' +# Simplistic tokenizer used mostly for testing purposes class BaseTokenizer + # @return [StringScanner] attr_reader(:scanner) + + # @return [Integer] current line number attr_reader(:lineno) + + # @return [Integer] position of start of current line in source text attr_reader(:line_start) - + class ScanError < StandardError; end - # Constructor. Initialize a tokenizer for Skeem. + # Constructor. Initialize a tokenizer. # @param source [String] Skeem text to tokenize. def initialize(source) @scanner = StringScanner.new('') restart(source) end - # @param source [String] Skeem text to tokenize. + # @param source [String] input text to tokenize. def restart(source) @scanner.string = source @lineno = 1 @line_start = 0 end @@ -32,17 +38,17 @@ tok_sequence << token unless token.nil? end return tok_sequence end - + protected - + # Patterns: # Unambiguous single character # Conditional single character: - # (e.g. '+' operator, '+' prefix for positive numbers) + # (e.g. '+' operator, '+' prefix for positive numbers) def _next_token skip_whitespaces curr_ch = scanner.peek(1) return nil if curr_ch.nil? || curr_ch.empty? @@ -55,15 +61,15 @@ raise ScanError, "Unknown token #{erroneous} on line #{lineno}" end return token end - + def recognize_token raise NotImplementedError end - + def build_token(aSymbolName, aLexeme, aFormat = :default) begin value = convert_to(aLexeme, aSymbolName, aFormat) col = scanner.pos - aLexeme.size - @line_start + 1 pos = Rley::Lexical::Position.new(@lineno, col) @@ -73,15 +79,15 @@ raise e end return token end - + def convert_to(aLexeme, _symbol_name, _format) return aLexeme end - + def skip_whitespaces pre_pos = scanner.pos loop do ws_found = false @@ -91,23 +97,18 @@ found = scanner.skip(/(?:\r\n)|\r|\n/) if found ws_found = true next_line end - # next_ch = scanner.peek(1) - # if next_ch == ';' - # cmt_found = true - # scanner.skip(/;[^\r\n]*(?:(?:\r\n)|\r|\n)?/) - # next_line - # end + break unless ws_found || cmt_found end curr_pos = scanner.pos return if curr_pos == pre_pos end - + def next_line @lineno += 1 @line_start = scanner.pos - end + end end # class