parser.y in lrama-0.5.8 vs parser.y in lrama-0.5.9

- old
+ new

@@ -1,23 +1,25 @@ class Lrama::Parser + expect 7 + token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG + rule + input: prologue_declarations bison_declarations "%%" grammar epilogue_opt prologue_declarations: # empty | prologue_declarations prologue_declaration prologue_declaration: "%{" { - @lexer.status = :c_declaration - @lexer.end_symbol = '%}' + begin_c_declaration("%}") @grammar.prologue_first_lineno = @lexer.line } C_DECLARATION { - @lexer.status = :initial - @lexer.end_symbol = nil + end_c_declaration } "%}" { @grammar.prologue = val[2].s_value } @@ -43,75 +45,77 @@ val[1].each {|token| token.references = [] @grammar.parse_param = @grammar.build_code(:parse_param, token).token_code.s_value } } + | "%code" IDENTIFIER "{" + { + begin_c_declaration("}") + } + C_DECLARATION + { + end_c_declaration + } + "}" + { + @grammar.add_percent_code(id: val[1], code: val[4]) + } | "%initial-action" "{" { - @lexer.status = :c_declaration - @lexer.end_symbol = '}' + begin_c_declaration("}") } C_DECLARATION { - @lexer.status = :initial - @lexer.end_symbol = nil + end_c_declaration } "}" { @grammar.initial_action = @grammar.build_code(:initial_action, val[3]) } | ";" grammar_declaration: "%union" "{" { - @lexer.status = :c_declaration - @lexer.end_symbol = '}' + begin_c_declaration("}") } C_DECLARATION { - @lexer.status = :initial - @lexer.end_symbol = nil + end_c_declaration } "}" { @grammar.set_union(@grammar.build_code(:union, val[3]), val[3].line) } | symbol_declaration | "%destructor" "{" { - @lexer.status = :c_declaration - @lexer.end_symbol = '}' + begin_c_declaration("}") } C_DECLARATION { - @lexer.status = :initial - @lexer.end_symbol = nil + end_c_declaration } - "}" generic_symlist + "}" generic_symlist | "%printer" "{" { - @lexer.status = :c_declaration - @lexer.end_symbol = '}' + begin_c_declaration("}") } C_DECLARATION { - @lexer.status = :initial - @lexer.end_symbol = nil + end_c_declaration } "}" generic_symlist { @grammar.add_printer(ident_or_tags: val[6], code: @grammar.build_code(:printer, val[3]), lineno: val[3].line) } | "%error-token" "{" { - @lexer.status = :c_declaration - @lexer.end_symbol = '}' + begin_c_declaration("}") } C_DECLARATION { - @lexer.status = :initial - @lexer.end_symbol = nil + end_c_declaration } "}" generic_symlist { @grammar.add_error_token(ident_or_tags: val[6], code: @grammar.build_code(:error_token, val[3]), lineno: val[3].line) } @@ -215,31 +219,27 @@ symbol: id | string_as_id params: params "{" { - @lexer.status = :c_declaration - @lexer.end_symbol = '}' + begin_c_declaration("}") } C_DECLARATION { - @lexer.status = :initial - @lexer.end_symbol = nil + end_c_declaration } "}" { result = val[0].append(val[3]) } | "{" { - @lexer.status = :c_declaration - @lexer.end_symbol = '}' + begin_c_declaration("}") } C_DECLARATION { - @lexer.status = :initial - @lexer.end_symbol = nil + end_c_declaration } "}" { result = [val[2]] } @@ -272,11 +272,11 @@ | grammar_declaration ";" rules: id_colon named_ref_opt ":" rhs_list { lhs = val[0] - lhs.alias = val[1] + lhs.alias_name = val[1] val[3].each {|hash| @grammar.add_rule(lhs: lhs, rhs: hash[:rhs], lineno: hash[:lineno]) } } @@ -290,60 +290,57 @@ } | rhs_list ";" rhs: /* empty */ { + reset_precs result = [] - @prec_seen = false - @code_after_prec = false } + | "%empty" + { + reset_precs + result = [] + } | rhs symbol named_ref_opt { token = val[1] - token.alias = val[2] + token.alias_name = val[2] result = val[0].append(token) } + | rhs "?" + { + token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1]) + result = val[0].append(token) + } + | rhs "+" + { + token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1]) + result = val[0].append(token) + } + | rhs "*" + { + token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1]) + result = val[0].append(token) + } | rhs "{" { if @prec_seen raise "Multiple User_code after %prec" if @code_after_prec @code_after_prec = true end - @lexer.status = :c_declaration - @lexer.end_symbol = '}' + begin_c_declaration("}") } C_DECLARATION { - @lexer.status = :initial - @lexer.end_symbol = nil + end_c_declaration } "}" named_ref_opt { token = val[3] - token.alias = val[6] + token.alias_name = val[6] result = val[0].append(token) } - | "{" - { - if @prec_seen - raise "Multiple User_code after %prec" if @code_after_prec - @code_after_prec = true - end - @lexer.status = :c_declaration - @lexer.end_symbol = '}' - } - C_DECLARATION - { - @lexer.status = :initial - @lexer.end_symbol = nil - } - "}" named_ref_opt - { - token = val[2] - token.alias = val[5] - result = [token] - } | rhs "%prec" symbol { sym = @grammar.find_symbol_by_id!(val[2]) result = val[0].append(sym) @prec_seen = true @@ -355,18 +352,16 @@ id_colon: IDENT_COLON epilogue_opt: # empty | "%%" { - @lexer.status = :c_declaration - @lexer.end_symbol = '\Z' + begin_c_declaration('\Z') @grammar.epilogue_first_lineno = @lexer.line + 1 } C_DECLARATION { - @lexer.status = :initial - @lexer.end_symbol = nil + end_c_declaration @grammar.epilogue = val[2].s_value } variable: id @@ -379,29 +374,30 @@ | generic_symlist generic_symlist_item { result = val[0].append(val[1]) } generic_symlist_item: symbol | TAG - string_as_id: STRING { result = Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Ident, s_value: val[0]) } + string_as_id: STRING { result = Lrama::Lexer::Token::Ident.new(s_value: val[0]) } end ---- inner include Lrama::Report::Duration -def initialize(text, path) +def initialize(text, path, debug = false) @text = text @path = path + @yydebug = debug end def parse report_duration(:parse) do @lexer = Lrama::Lexer.new(@text) @grammar = Lrama::Grammar.new @precedence_number = 0 + reset_precs do_parse - @grammar.extract_references @grammar.prepare @grammar.compute_nullable @grammar.compute_first_set @grammar.validate! @grammar @@ -411,12 +407,40 @@ def next_token @lexer.next_token end def on_error(error_token_id, error_value, value_stack) - source = @text.split("\n")[error_value.line - 1] + if error_value.respond_to?(:line) && error_value.respond_to?(:column) + line = error_value.line + first_column = error_value.column + else + line = @lexer.line + first_column = @lexer.head_column + end + raise ParseError, <<~ERROR - #{@path}:#{@lexer.line}:#{@lexer.column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'}) - #{source} - #{' ' * @lexer.column}^ + #{@path}:#{line}:#{first_column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'}) + #{@text.split("\n")[line - 1]} + #{carrets(first_column)} ERROR +end + +private + +def reset_precs + @prec_seen = false + @code_after_prec = false +end + +def begin_c_declaration(end_symbol) + @lexer.status = :c_declaration + @lexer.end_symbol = end_symbol +end + +def end_c_declaration + @lexer.status = :initial + @lexer.end_symbol = nil +end + +def carrets(first_column) + ' ' * (first_column + 1) + '^' * (@lexer.column - first_column) end