lib/lrama/grammar.rb in lrama-0.5.6 vs lib/lrama/grammar.rb in lrama-0.5.7
- old
+ new
@@ -1,5 +1,7 @@
+require "strscan"
+
require "lrama/grammar/auxiliary"
require "lrama/grammar/code"
require "lrama/grammar/error_token"
require "lrama/grammar/precedence"
require "lrama/grammar/printer"
@@ -304,10 +306,192 @@
def nterms
@nterms ||= @symbols.select(&:nterm?)
end
+ def extract_references
+ unless initial_action.nil?
+ scanner = StringScanner.new(initial_action.s_value)
+ references = []
+
+ while !scanner.eos? do
+ start = scanner.pos
+ case
+ # $ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, "$", tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+ # @ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/@\$/) # @$
+ references << [:at, "$", nil, start, scanner.pos - 1]
+ when scanner.scan(/@(\d+)/) # @1
+ references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ else
+ scanner.getch
+ end
+ end
+
+ initial_action.token_code.references = references
+ build_references(initial_action.token_code)
+ end
+
+ @printers.each do |printer|
+ scanner = StringScanner.new(printer.code.s_value)
+ references = []
+
+ while !scanner.eos? do
+ start = scanner.pos
+ case
+ # $ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, "$", tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+ # @ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/@\$/) # @$
+ references << [:at, "$", nil, start, scanner.pos - 1]
+ when scanner.scan(/@(\d+)/) # @1
+ references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ else
+ scanner.getch
+ end
+ end
+
+ printer.code.token_code.references = references
+ build_references(printer.code.token_code)
+ end
+
+ @error_tokens.each do |error_token|
+ scanner = StringScanner.new(error_token.code.s_value)
+ references = []
+
+ while !scanner.eos? do
+ start = scanner.pos
+ case
+ # $ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, "$", tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+ # @ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/@\$/) # @$
+ references << [:at, "$", nil, start, scanner.pos - 1]
+ when scanner.scan(/@(\d+)/) # @1
+ references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ else
+ scanner.getch
+ end
+ end
+
+ error_token.code.token_code.references = references
+ build_references(error_token.code.token_code)
+ end
+
+ @_rules.each do |lhs, rhs, _|
+ rhs.each_with_index do |token, index|
+ next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
+
+ scanner = StringScanner.new(token.s_value)
+ references = []
+
+ while !scanner.eos? do
+ start = scanner.pos
+ case
+ # $ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, "$", tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+ when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+ tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+ references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+ # @ references
+ # It need to wrap an identifier with brackets to use ".-" for identifiers
+ when scanner.scan(/@\$/) # @$
+ references << [:at, "$", nil, start, scanner.pos - 1]
+ when scanner.scan(/@(\d+)/) # @1
+ references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+ when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+ when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
+ references << [:at, scanner[1], nil, start, scanner.pos - 1]
+
+ when scanner.scan(/\/\*/)
+ scanner.scan_until(/\*\//)
+ else
+ scanner.getch
+ end
+ end
+
+ token.references = references
+ token.numberize_references(lhs, rhs)
+ build_references(token)
+ end
+ end
+ end
+
+ def create_token(type, s_value, line, column)
+ t = Token.new(type: type, s_value: s_value)
+ t.line = line
+ t.column = column
+
+ return t
+ end
+
private
def find_nterm_by_id!(id)
nterms.find do |nterm|
nterm.id == id
@@ -468,10 +652,12 @@
end
end
# Fill #number and #token_id
def fill_symbol_number
- # TODO: why start from 256
+ # Character literal in grammar file has
+ # token id corresponding to ASCII code by default,
+ # so start token_id from 256.
token_id = 256
# YYEMPTY = -2
# YYEOF = 0
# YYerror = 1