parser.y in lrama-0.5.12 vs parser.y in lrama-0.6.0

- old
+ new

@@ -27,10 +27,11 @@
 
   bison_declarations: /* empty */ { result = "" }
                     | bison_declarations bison_declaration
 
   bison_declaration: grammar_declaration
+                   | rule_declaration
                    | "%expect" INTEGER { @grammar.expect = val[1] }
                    | "%define" variable value
                    | "%param" params
                    | "%lex-param" params
                        {
@@ -200,10 +201,89 @@
   token_declaration_list: token_declaration { result = [val[0]] }
                         | token_declaration_list token_declaration { result = val[0].append(val[1]) }
 
   token_declaration: id int_opt alias { result = val }
 
+  rule_declaration: "%rule" IDENTIFIER "(" rule_args ")" ":" rule_rhs_list
+                      {
+                        builder = Grammar::ParameterizingRuleBuilder.new(val[1].s_value, val[3], val[6])
+                        @grammar.add_parameterizing_rule_builder(builder)
+                      }
+
+  rule_args: IDENTIFIER { result = [val[0]] }
+           | rule_args "," IDENTIFIER { result = val[0].append(val[2]) }
+
+  rule_rhs_list: rule_rhs
+                {
+                  builder = val[0]
+                  result = [builder]
+                }
+          | rule_rhs_list "|" rule_rhs
+                {
+                  builder = val[2]
+                  result = val[0].append(builder)
+                }
+
+  rule_rhs: /* empty */
+            {
+              reset_precs
+              result = Grammar::ParameterizingRuleRhsBuilder.new
+            }
+          | "%empty"
+            {
+              reset_precs
+              result = Grammar::ParameterizingRuleRhsBuilder.new
+            }
+          | rule_rhs symbol named_ref_opt
+            {
+              token = val[1]
+              token.alias_name = val[2]
+              builder = val[0]
+              builder.symbols << token
+              result = builder
+            }
+          | rule_rhs IDENTIFIER parameterizing_suffix
+              {
+                builder = val[0]
+                builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]])
+                result = builder
+              }
+          | rule_rhs IDENTIFIER "(" parameterizing_args ")"
+              {
+                builder = val[0]
+                builder.symbols << Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3])
+                result = builder
+              }
+          | rule_rhs "{"
+            {
+              if @prec_seen
+                on_action_error("multiple User_code after %prec", val[0])  if @code_after_prec
+                @code_after_prec = true
+              end
+              begin_c_declaration("}")
+            }
+          C_DECLARATION
+            {
+              end_c_declaration
+            }
+          "}" named_ref_opt
+            {
+              user_code = val[3]
+              user_code.alias_name = val[6]
+              builder = val[0]
+              builder.user_code = user_code
+              result = builder
+            }
+          | rule_rhs "%prec" symbol
+            {
+              sym = @grammar.find_symbol_by_id!(val[2])
+              @prec_seen = true
+              builder = val[0]
+              builder.precedence_sym = sym
+              result = builder
+            }
+
   int_opt: # empty
          | INTEGER
 
   alias: # empty
        | STRING # TODO: change this to string_as_id
@@ -324,22 +404,22 @@
            token.alias_name = val[2]
            builder = val[0]
            builder.add_rhs(token)
            result = builder
          }
-     | rhs IDENTIFIER parameterizing_suffix tag_opt
+     | rhs symbol parameterizing_suffix tag_opt
          {
-           token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[2], location: @lexer.location, args: [val[1]])
+           token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[2], location: @lexer.location, args: [val[1]])
            builder = val[0]
            builder.add_rhs(token)
            builder.lhs_tag = val[3]
            builder.line = val[1].first_line
            result = builder
          }
      | rhs IDENTIFIER "(" parameterizing_args ")" tag_opt
          {
-           token = Lrama::Lexer::Token::Parameterizing.new(s_value: val[1].s_value, location: @lexer.location, args: val[3])
+           token = Lrama::Lexer::Token::InstantiateRule.new(s_value: val[1].s_value, location: @lexer.location, args: val[3])
            builder = val[0]
            builder.add_rhs(token)
            builder.lhs_tag = val[5]
            builder.line = val[1].first_line
            result = builder
@@ -419,20 +499,19 @@
 ---- inner
 
 include Lrama::Report::Duration
 
 def initialize(text, path, debug = false)
-  @text = text
-  @path = path
+  @grammar_file = Lrama::Lexer::GrammarFile.new(path, text)
   @yydebug = debug
   @rule_counter = Lrama::Grammar::Counter.new(0)
   @midrule_action_counter = Lrama::Grammar::Counter.new(1)
 end
 
 def parse
   report_duration(:parse) do
-    @lexer = Lrama::Lexer.new(@text)
+    @lexer = Lrama::Lexer.new(@grammar_file)
     @grammar = Lrama::Grammar.new(@rule_counter)
     @precedence_number = 0
     reset_precs
     do_parse
     @grammar.prepare
@@ -445,44 +524,30 @@
   @lexer.next_token
 end
 
 def on_error(error_token_id, error_value, value_stack)
   if error_value.is_a?(Lrama::Lexer::Token)
-    line = error_value.first_line
-    first_column = error_value.first_column
-    last_column = error_value.last_column
+    location = error_value.location
     value = "'#{error_value.s_value}'"
   else
-    line = @lexer.line
-    first_column = @lexer.head_column
-    last_column = @lexer.column
+    location = @lexer.location
     value = error_value.inspect
   end
 
-  raise ParseError, <<~ERROR
-    #{@path}:#{line}:#{first_column}: parse error on value #{value} (#{token_to_str(error_token_id) || '?'})
-    #{@text.split("\n")[line - 1]}
-    #{carrets(first_column, last_column)}
-  ERROR
+  error_message = "parse error on value #{value} (#{token_to_str(error_token_id) || '?'})"
+
+  raise_parse_error(error_message, location)
 end
 
 def on_action_error(error_message, error_value)
   if error_value.is_a?(Lrama::Lexer::Token)
-    line = error_value.first_line
-    first_column = error_value.first_column
-    last_column = error_value.last_column
+    location = error_value.location
   else
-    line = @lexer.line
-    first_column = @lexer.head_column
-    last_column = @lexer.column
+    location = @lexer.location
   end
 
-  raise ParseError, <<~ERROR
-    #{@path}:#{line}: #{error_message}
-    #{@text.split("\n")[line - 1]}
-    #{carrets(first_column, last_column)}
-  ERROR
+  raise_parse_error(error_message, location)
 end
 
 private
 
 def reset_precs
@@ -498,8 +563,8 @@
 def end_c_declaration
   @lexer.status = :initial
   @lexer.end_symbol = nil
 end
 
-def carrets(first_column, last_column)
-  ' ' * (first_column + 1) + '^' * (last_column - first_column)
+def raise_parse_error(error_message, location)
+  raise ParseError, location.generate_error_message(error_message)
 end