parser.y in lrama-0.5.9 vs parser.y in lrama-0.5.10

- old
+ new

@@ -1,7 +1,7 @@
 class Lrama::Parser
-  expect 7
+  expect 1
 
   token C_DECLARATION CHARACTER IDENT_COLON IDENTIFIER INTEGER STRING TAG
 
 rule
 
@@ -34,19 +34,17 @@
                    | "%require" STRING
                    | "%param" params
                    | "%lex-param" params
                        {
                          val[1].each {|token|
-                           token.references = []
-                           @grammar.lex_param = @grammar.build_code(:lex_param, token).token_code.s_value
+                           @grammar.lex_param = Grammar::Code::NoReferenceCode.new(type: :lex_param, token_code: token).token_code.s_value
                          }
                        }
                    | "%parse-param" params
                        {
                          val[1].each {|token|
-                           token.references = []
-                           @grammar.parse_param = @grammar.build_code(:parse_param, token).token_code.s_value
+                           @grammar.parse_param = Grammar::Code::NoReferenceCode.new(type: :parse_param, token_code: token).token_code.s_value
                          }
                        }
                    | "%code" IDENTIFIER "{"
                        {
                          begin_c_declaration("}")
@@ -67,11 +65,11 @@
                        {
                          end_c_declaration
                        }
                      "}"
                        {
-                         @grammar.initial_action = @grammar.build_code(:initial_action, val[3])
+                         @grammar.initial_action = Grammar::Code::InitialActionCode.new(type: :initial_action, token_code: val[3])
                        }
                    | ";"
 
   grammar_declaration: "%union" "{"
                          {
@@ -81,11 +79,14 @@
                          {
                            end_c_declaration
                          }
                        "}"
                          {
-                           @grammar.set_union(@grammar.build_code(:union, val[3]), val[3].line)
+                           @grammar.set_union(
+                             Grammar::Code::NoReferenceCode.new(type: :union, token_code: val[3]),
+                             val[3].line
+                           )
                          }
                      | symbol_declaration
                      | "%destructor" "{"
                          {
                            begin_c_declaration("}")
@@ -103,11 +104,15 @@
                          {
                            end_c_declaration
                          }
                        "}" generic_symlist
                          {
-                           @grammar.add_printer(ident_or_tags: val[6], code: @grammar.build_code(:printer, val[3]), lineno: val[3].line)
+                           @grammar.add_printer(
+                             ident_or_tags: val[6],
+                             token_code: val[3],
+                             lineno: val[3].line
+                           )
                          }
                      | "%error-token" "{"
                          {
                            begin_c_declaration("}")
                          }
@@ -115,11 +120,15 @@
                          {
                            end_c_declaration
                          }
                        "}" generic_symlist
                          {
-                           @grammar.add_error_token(ident_or_tags: val[6], code: @grammar.build_code(:error_token, val[3]), lineno: val[3].line)
+                           @grammar.add_error_token(
+                             ident_or_tags: val[6],
+                             token_code: val[3],
+                             lineno: val[3].line
+                           )
                          }
 
   symbol_declaration: "%token" token_declarations
                     | "%type" symbol_declarations
                         {
@@ -250,22 +259,22 @@
                                        }
                                    | TAG token_declaration_list_for_precedence
                                        {
                                          result = [{tag: val[0], tokens: val[1]}]
                                        }
-                                   | token_declarations_for_precedence token_declaration_list_for_precedence
+                                   | token_declarations_for_precedence TAG token_declaration_list_for_precedence
                                        {
-                                         result = val[0].append({tag: nil, tokens: val[1]})
+                                         result = val[0].append({tag: val[1], tokens: val[2]})
                                        }
 
   token_declaration_list_for_precedence: token_declaration_for_precedence { result = [val[0]] }
                                        | token_declaration_list_for_precedence token_declaration_for_precedence { result = val[0].append(val[1]) }
 
   token_declaration_for_precedence: id
 
-  id: IDENTIFIER { raise "Ident after %prec" if @prec_seen }
-    | CHARACTER { raise "Char after %prec" if @prec_seen }
+  id: IDENTIFIER { on_action_error("ident after %prec", val[0]) if @prec_seen }
+    | CHARACTER { on_action_error("char after %prec", val[0]) if @prec_seen }
 
   grammar: rules_or_grammar_declaration
          | grammar rules_or_grammar_declaration
 
   rules_or_grammar_declaration: rules
@@ -273,60 +282,78 @@
 
   rules: id_colon named_ref_opt ":" rhs_list
            {
              lhs = val[0]
              lhs.alias_name = val[1]
-             val[3].each {|hash|
-               @grammar.add_rule(lhs: lhs, rhs: hash[:rhs], lineno: hash[:lineno])
-             }
+             val[3].each do |builder|
+               builder.lhs = lhs
+               builder.complete_input
+               @grammar.add_rule_builder(builder)
+             end
            }
 
   rhs_list: rhs
               {
-                result = [{rhs: val[0], lineno: val[0].first&.line || @lexer.line - 1}]
+                builder = val[0]
+                if !builder.line
+                  builder.line = @lexer.line - 1
+                end
+                result = [builder]
               }
           | rhs_list "|" rhs
               {
-                result = val[0].append({rhs: val[2], lineno: val[2].first&.line || @lexer.line - 1})
+                builder = val[2]
+                if !builder.line
+                  builder.line = @lexer.line - 1
+                end
+                result = val[0].append(builder)
               }
           | rhs_list ";"
 
   rhs: /* empty */
          {
            reset_precs
-           result = []
+           result = Grammar::RuleBuilder.new(@rule_counter, @midrule_action_counter)
          }
      | "%empty"
          {
            reset_precs
-           result = []
+           result = Grammar::RuleBuilder.new(@rule_counter, @midrule_action_counter)
          }
      | rhs symbol named_ref_opt
          {
            token = val[1]
            token.alias_name = val[2]
-           result = val[0].append(token)
+           builder = val[0]
+           builder.add_rhs(token)
+           result = builder
          }
-     | rhs "?"
-          {
-            token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
-            result = val[0].append(token)
-          }
-     | rhs "+"
-          {
-            token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
-            result = val[0].append(token)
-          }
-     | rhs "*"
-          {
-             token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1])
-             result = val[0].append(token)
-          }
+     | rhs IDENTIFIER parameterizing_suffix
+         {
+           token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[2], location: @lexer.location, args: [val[1]])
+           builder = val[0]
+           builder.add_rhs(token)
+           result = builder
+         }
+     | rhs IDENTIFIER "(" symbol ")"
+         {
+           token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1].s_value, location: @lexer.location, args: [val[3]])
+           builder = val[0]
+           builder.add_rhs(token)
+           result = builder
+         }
+     | rhs IDENTIFIER "(" symbol "," symbol ")"
+        {
+          token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1].s_value, location: @lexer.location, args: [val[3], val[5]])
+          builder = val[0]
+          builder.add_rhs(token)
+          result = builder
+        }
      | rhs "{"
          {
            if @prec_seen
-             raise "Multiple User_code after %prec" if @code_after_prec
+             on_action_error("multiple User_code after %prec", val[0])  if @code_after_prec
              @code_after_prec = true
            end
            begin_c_declaration("}")
          }
        C_DECLARATION
@@ -335,19 +362,27 @@
          }
        "}" named_ref_opt
          {
            token = val[3]
            token.alias_name = val[6]
-           result = val[0].append(token)
+           builder = val[0]
+           builder.user_code = token
+           result = builder
          }
      | rhs "%prec" symbol
          {
            sym = @grammar.find_symbol_by_id!(val[2])
-           result = val[0].append(sym)
            @prec_seen = true
+           builder = val[0]
+           builder.precedence_sym = sym
+           result = builder
          }
 
+  parameterizing_suffix: "?"
+                       | "+"
+                       | "*"
+
   named_ref_opt: # empty
                | '[' IDENTIFIER ']' { result = val[1].s_value }
 
   id_colon: IDENT_COLON
 
@@ -385,47 +420,69 @@
 
 def initialize(text, path, debug = false)
   @text = text
   @path = path
   @yydebug = debug
+  @rule_counter = Lrama::Grammar::Counter.new(0)
+  @midrule_action_counter = Lrama::Grammar::Counter.new(1)
 end
 
 def parse
   report_duration(:parse) do
     @lexer = Lrama::Lexer.new(@text)
-    @grammar = Lrama::Grammar.new
+    @grammar = Lrama::Grammar.new(@rule_counter)
     @precedence_number = 0
     reset_precs
     do_parse
     @grammar.prepare
-    @grammar.compute_nullable
-    @grammar.compute_first_set
     @grammar.validate!
     @grammar
   end
 end
 
 def next_token
   @lexer.next_token
 end
 
 def on_error(error_token_id, error_value, value_stack)
-  if error_value.respond_to?(:line) && error_value.respond_to?(:column)
-    line = error_value.line
-    first_column = error_value.column
+  if error_value.is_a?(Lrama::Lexer::Token)
+    line = error_value.first_line
+    first_column = error_value.first_column
+    last_column = error_value.last_column
+    value = "'#{error_value.s_value}'"
   else
     line = @lexer.line
     first_column = @lexer.head_column
+    last_column = @lexer.column
+    value = error_value.inspect
   end
 
   raise ParseError, <<~ERROR
-    #{@path}:#{line}:#{first_column}: parse error on value #{error_value.inspect} (#{token_to_str(error_token_id) || '?'})
+    #{@path}:#{line}:#{first_column}: parse error on value #{value} (#{token_to_str(error_token_id) || '?'})
     #{@text.split("\n")[line - 1]}
-    #{carrets(first_column)}
+    #{carrets(first_column, last_column)}
   ERROR
 end
 
+def on_action_error(error_message, error_value)
+  if error_value.is_a?(Lrama::Lexer::Token)
+    line = error_value.first_line
+    first_column = error_value.first_column
+    last_column = error_value.last_column
+  else
+    line = @lexer.line
+    first_column = @lexer.head_column
+    last_column = @lexer.column
+  end
+
+  raise ParseError, <<~ERROR
+    #{@path}:#{line}: #{error_message}
+    #{@text.split("\n")[line - 1]}
+    #{carrets(first_column, last_column)}
+  ERROR
+end
+
 private
 
 def reset_precs
   @prec_seen = false
   @code_after_prec = false
@@ -439,8 +496,8 @@
 def end_c_declaration
   @lexer.status = :initial
   @lexer.end_symbol = nil
 end
 
-def carrets(first_column)
-  ' ' * (first_column + 1) + '^' * (@lexer.column - first_column)
+def carrets(first_column, last_column)
+  ' ' * (first_column + 1) + '^' * (last_column - first_column)
 end