lib/lrama/grammar.rb in lrama-0.5.6 vs lib/lrama/grammar.rb in lrama-0.5.7

- old
+ new

@@ -1,5 +1,7 @@ +require "strscan" + require "lrama/grammar/auxiliary" require "lrama/grammar/code" require "lrama/grammar/error_token" require "lrama/grammar/precedence" require "lrama/grammar/printer" @@ -304,10 +306,192 @@ def nterms @nterms ||= @symbols.select(&:nterm?) end + def extract_references + unless initial_action.nil? + scanner = StringScanner.new(initial_action.s_value) + references = [] + + while !scanner.eos? do + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$ + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, "$", tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + references << [:at, "$", nil, start, scanner.pos - 1] + when scanner.scan(/@(\d+)/) # @1 + references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1] + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + else + scanner.getch + end + end + + initial_action.token_code.references = references + build_references(initial_action.token_code) + end + + @printers.each do |printer| + scanner = StringScanner.new(printer.code.s_value) + references = [] + + while !scanner.eos? do + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$ + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, "$", tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + references << [:at, "$", nil, start, scanner.pos - 1] + when scanner.scan(/@(\d+)/) # @1 + references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1] + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + else + scanner.getch + end + end + + printer.code.token_code.references = references + build_references(printer.code.token_code) + end + + @error_tokens.each do |error_token| + scanner = StringScanner.new(error_token.code.s_value) + references = [] + + while !scanner.eos? do + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$ + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, "$", tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + references << [:at, "$", nil, start, scanner.pos - 1] + when scanner.scan(/@(\d+)/) # @1 + references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1] + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + else + scanner.getch + end + end + + error_token.code.token_code.references = references + build_references(error_token.code.token_code) + end + + @_rules.each do |lhs, rhs, _| + rhs.each_with_index do |token, index| + next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code + + scanner = StringScanner.new(token.s_value) + references = [] + + while !scanner.eos? do + start = scanner.pos + case + # $ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$ + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, "$", tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets) + tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil + references << [:dollar, scanner[2], tag, start, scanner.pos - 1] + + # @ references + # It need to wrap an identifier with brackets to use ".-" for identifiers + when scanner.scan(/@\$/) # @$ + references << [:at, "$", nil, start, scanner.pos - 1] + when scanner.scan(/@(\d+)/) # @1 + references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1] + when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets) + references << [:at, scanner[1], nil, start, scanner.pos - 1] + + when scanner.scan(/\/\*/) + scanner.scan_until(/\*\//) + else + scanner.getch + end + end + + token.references = references + token.numberize_references(lhs, rhs) + build_references(token) + end + end + end + + def create_token(type, s_value, line, column) + t = Token.new(type: type, s_value: s_value) + t.line = line + t.column = column + + return t + end + private def find_nterm_by_id!(id) nterms.find do |nterm| nterm.id == id @@ -468,10 +652,12 @@ end end # Fill #number and #token_id def fill_symbol_number - # TODO: why start from 256 + # Character literal in grammar file has + # token id corresponding to ASCII code by default, + # so start token_id from 256. token_id = 256 # YYEMPTY = -2 # YYEOF = 0 # YYerror = 1