parser.y in lrama-0.5.9 vs parser.y in lrama-0.5.10

- old
+ new

@@ -1,7 +1,7 @@ class Lrama::Parser - expect 7 + expect 1 token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG rule @@ -34,19 +34,17 @@ | "%require" STRING | "%param" params | "%lex-param" params { val[1].each {|token| - token.references = [] - @grammar.lex_param = @grammar.build_code(:lex_param, token).token_code.s_value + @grammar.lex_param = Grammar::Code::NoReferenceCode.new(type: :lex_param, token_code: token).token_code.s_value } } | "%parse-param" params { val[1].each {|token| - token.references = [] - @grammar.parse_param = @grammar.build_code(:parse_param, token).token_code.s_value + @grammar.parse_param = Grammar::Code::NoReferenceCode.new(type: :parse_param, token_code: token).token_code.s_value } } | "%code" IDENTIFIER "{" { begin_c_declaration("}") @@ -67,11 +65,11 @@ { end_c_declaration } "}" { - @grammar.initial_action = @grammar.build_code(:initial_action, val[3]) + @grammar.initial_action = Grammar::Code::InitialActionCode.new(type: :initial_action, token_code: val[3]) } | ";" grammar_declaration: "%union" "{" { @@ -81,11 +79,14 @@ { end_c_declaration } "}" { - @grammar.set_union(@grammar.build_code(:union, val[3]), val[3].line) + @grammar.set_union( + Grammar::Code::NoReferenceCode.new(type: :union, token_code: val[3]), + val[3].line + ) } | symbol_declaration | "%destructor" "{" { begin_c_declaration("}") @@ -103,11 +104,15 @@ { end_c_declaration } "}" generic_symlist { - @grammar.add_printer(ident_or_tags: val[6], code: @grammar.build_code(:printer, val[3]), lineno: val[3].line) + @grammar.add_printer( + ident_or_tags: val[6], + token_code: val[3], + lineno: val[3].line + ) } | "%error-token" "{" { begin_c_declaration("}") } @@ -115,11 +120,15 @@ { end_c_declaration } "}" generic_symlist { - @grammar.add_error_token(ident_or_tags: val[6], code: @grammar.build_code(:error_token, val[3]), lineno: val[3].line) + @grammar.add_error_token( + ident_or_tags: val[6], + token_code: val[3], + lineno: val[3].line + ) } symbol_declaration: "%token" token_declarations | "%type" symbol_declarations { @@ -250,22 +259,22 @@ } | TAG token_declaration_list_for_precedence { result = [{tag: val[0], tokens: val[1]}] } - | token_declarations_for_precedence token_declaration_list_for_precedence + | token_declarations_for_precedence TAG token_declaration_list_for_precedence { - result = val[0].append({tag: nil, tokens: val[1]}) + result = val[0].append({tag: val[1], tokens: val[2]}) } token_declaration_list_for_precedence: token_declaration_for_precedence { result = [val[0]] } | token_declaration_list_for_precedence token_declaration_for_precedence { result = val[0].append(val[1]) } token_declaration_for_precedence: id - id: IDENTIFIER { raise "Ident after %prec" if @prec_seen } - | CHARACTER { raise "Char after %prec" if @prec_seen } + id: IDENTIFIER { on_action_error("ident after %prec", val[0]) if @prec_seen } + | CHARACTER { on_action_error("char after %prec", val[0]) if @prec_seen } grammar: rules_or_grammar_declaration | grammar rules_or_grammar_declaration rules_or_grammar_declaration: rules @@ -273,60 +282,78 @@ rules: id_colon named_ref_opt ":" rhs_list { lhs = val[0] lhs.alias_name = val[1] - val[3].each {|hash| - @grammar.add_rule(lhs: lhs, rhs: hash[:rhs], lineno: hash[:lineno]) - } + val[3].each do |builder| + builder.lhs = lhs + builder.complete_input + @grammar.add_rule_builder(builder) + end } rhs_list: rhs { - result = [{rhs: val[0], lineno: val[0].first&.line || @lexer.line - 1}] + builder = val[0] + if !builder.line + builder.line = @lexer.line - 1 + end + result = [builder] } | rhs_list "|" rhs { - result = val[0].append({rhs: val[2], lineno: val[2].first&.line || @lexer.line - 1}) + builder = val[2] + if !builder.line + builder.line = @lexer.line - 1 + end + result = val[0].append(builder) } | rhs_list ";" rhs: /* empty */ { reset_precs - result = [] + result = Grammar::RuleBuilder.new(@rule_counter, @midrule_action_counter) } | "%empty" { reset_precs - result = [] + result = Grammar::RuleBuilder.new(@rule_counter, @midrule_action_counter) } | rhs symbol named_ref_opt { token = val[1] token.alias_name = val[2] - result = val[0].append(token) + builder = val[0] + builder.add_rhs(token) + result = builder } - | rhs "?" - { - token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1]) - result = val[0].append(token) - } - | rhs "+" - { - token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1]) - result = val[0].append(token) - } - | rhs "*" - { - token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1]) - result = val[0].append(token) - } + | rhs IDENTIFIER parameterizing_suffix + { + token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[2], location: @lexer.location, args: [val[1]]) + builder = val[0] + builder.add_rhs(token) + result = builder + } + | rhs IDENTIFIER "(" symbol ")" + { + token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1].s_value, location: @lexer.location, args: [val[3]]) + builder = val[0] + builder.add_rhs(token) + result = builder + } + | rhs IDENTIFIER "(" symbol "," symbol ")" + { + token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1].s_value, location: @lexer.location, args: [val[3], val[5]]) + builder = val[0] + builder.add_rhs(token) + result = builder + } | rhs "{" { if @prec_seen - raise "Multiple User_code after %prec" if @code_after_prec + on_action_error("multiple User_code after %prec", val[0]) if @code_after_prec @code_after_prec = true end begin_c_declaration("}") } C_DECLARATION @@ -335,19 +362,27 @@ } "}" named_ref_opt { token = val[3] token.alias_name = val[6] - result = val[0].append(token) + builder = val[0] + builder.user_code = token + result = builder } | rhs "%prec" symbol { sym = @grammar.find_symbol_by_id!(val[2]) - result = val[0].append(sym) @prec_seen = true + builder = val[0] + builder.precedence_sym = sym + result = builder } + parameterizing_suffix: "?" + | "+" + | "*" + named_ref_opt: # empty | '[' IDENTIFIER ']' { result = val[1].s_value } id_colon: IDENT_COLON @@ -385,47 +420,69 @@ def initialize(text, path, debug = false) @text = text @path = path @yydebug = debug + @rule_counter = Lrama::Grammar::Counter.new(0) + @midrule_action_counter = Lrama::Grammar::Counter.new(1) end def parse report_duration(:parse) do @lexer = Lrama::Lexer.new(@text) - @grammar = Lrama::Grammar.new + @grammar = Lrama::Grammar.new(@rule_counter) @precedence_number = 0 reset_precs do_parse @grammar.prepare - @grammar.compute_nullable - @grammar.compute_first_set @grammar.validate! @grammar end end def next_token @lexer.next_token end def on_error(error_token_id, error_value, value_stack) - if error_value.respond_to?(:line) && error_value.respond_to?(:column) - line = error_value.line - first_column = error_value.column + if error_value.is_a?(Lrama::Lexer::Token) + line = error_value.first_line + first_column = error_value.first_column + last_column = error_value.last_column + value = "'#{error_value.s_value}'" else line = @lexer.line first_column = @lexer.head_column + last_column = @lexer.column + value = error_value.inspect end raise ParseError, <<~ERROR - #{@path}:#{line}:#{first_column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'}) + #{@path}:#{line}:#{first_column}: parse error on value #{value} (#{token_to_str(error_token_id) || '?'}) #{@text.split("\n")[line - 1]} - #{carrets(first_column)} + #{carrets(first_column, last_column)} ERROR end +def on_action_error(error_message, error_value) + if error_value.is_a?(Lrama::Lexer::Token) + line = error_value.first_line + first_column = error_value.first_column + last_column = error_value.last_column + else + line = @lexer.line + first_column = @lexer.head_column + last_column = @lexer.column + end + + raise ParseError, <<~ERROR + #{@path}:#{line}: #{error_message} + #{@text.split("\n")[line - 1]} + #{carrets(first_column, last_column)} + ERROR +end + private def reset_precs @prec_seen = false @code_after_prec = false @@ -439,8 +496,8 @@ def end_c_declaration @lexer.status = :initial @lexer.end_symbol = nil end -def carrets(first_column) - ' ' * (first_column + 1) + '^' * (@lexer.column - first_column) +def carrets(first_column, last_column) + ' ' * (first_column + 1) + '^' * (last_column - first_column) end