examples/general/SRL/lib/tokenizer.rb in rley-0.5.08 vs examples/general/SRL/lib/tokenizer.rb in rley-0.5.09

- old
+ new

@@ -27,18 +27,22 @@ @@keywords = %w[ AND AT BETWEEN EXACTLY + FROM LEAST + LETTER MORE NEVER ONCE OPTIONAL OR TIMES + TO TWICE + UPPERCASE ].map { |x| [x, x] } .to_h class ScanError < StandardError; end def initialize(source, aGrammar) @@ -65,21 +69,20 @@ return nil if curr_ch.nil? token = nil if '(),'.include? curr_ch - # Single character token + # Delimiters, separators => single character token token = build_token(@@lexeme2name[curr_ch], scanner.getch) elsif (lexeme = scanner.scan(/[0-9]{2,}/)) token = build_token('INTEGER', lexeme) # An integer has two or more digits elsif (lexeme = scanner.scan(/[0-9]/)) - token = build_token('DIGIT', lexeme) + token = build_token('DIGIT_LIT', lexeme) elsif (lexeme = scanner.scan(/[a-zA-Z]{2,}/)) token = build_token(@@keywords[lexeme.upcase], lexeme) # TODO: handle case unknown identifier - elsif (lexeme = scanner.scan(/\w/)) - puts 'Buff' - token = build_token('CHAR', lexeme) + elsif (lexeme = scanner.scan(/[a-zA-Z]((?=\s)|$)/)) + token = build_token('LETTER_LIT', lexeme) else # Unknown token erroneous = curr_ch.nil? ? '' : curr_ch sequel = scanner.scan(/.{1,20}/) erroneous += sequel unless sequel.nil? raise ScanError.new("Unknown token #{erroneous}")