ll1.rb in ebnf-0.3.2

- old
+ new

@@ -19,16 +19,25 @@
     #
     # @return [Hash{Symbol, String => Symbol}]
     attr_reader :follow
 
     # Terminal table
+    #
     # The list of terminals used in the grammar.
     #
     # @return [Array<String, Symbol>]
     attr_reader :terminals
 
+    # Pass expression
+    #
+    # A Terminal symbol used for skipping whitespace and comments
+    #
+    # @return [Symbol, String]
+    attr_reader :pass
+
     # Start symbol
+    #
     # The rule which starts the grammar
     #
     # @return [Symbol]
     attr_reader :start
 
@@ -67,11 +76,10 @@
             rule.comp = new_rule
           end
 
           @ast += comprehensions
           progress("FF.c") {"(#{ittr}) comprehensions #{comprehensions.length}"}
-          #require 'debugger'; breakpoint
           ittr += 1
         end while !comprehensions.empty?
 
         ittr = 0
         begin
@@ -126,11 +134,10 @@
                 debug("Fo.2.1") {"(#{ittr}) add follow #{comp.first.inspect} from #{comp.sym} to #{ai.sym}"}
                 follows += ai.add_follow(comp.first)
               end
 
               # If there is no comprehension of this rule (meaning, it is a sequence of one non-terminal), then the follows of the non-terminal include the follows of the rule. This handles rules with multiple sequences because it will have a comprehension that includes the last element in the sequence
-              #require 'debugger'; breakpoint if ai.sym == :_predicateObjectList_1 && aj.sym == :_predicateObjectList_7
               if !aj.comp && aj.follow
                 debug("Fo.2.1a") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{ai.sym}"}
                 follows += ai.add_follow(aj.follow)
               end
 
@@ -140,11 +147,10 @@
                 follows += ai.add_follow(aj.follow)
               end
             end
 
             # Since the rules are of the form wAiw', and we've handled the case which is just Aiw', this leaves those cases that have rules prior to Ai. This basically says that the follows of a rule are added to the follows of the comprehension of the rule
-            #require 'debugger'; breakpoint if aj.sym == :_predicateObjectList_6 && aj.follow
             if aj.comp && aj.follow
               debug("Fo.2.3") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{aj.comp.sym}"}
               follows += aj.comp.add_follow(aj.follow)
             end
           end
@@ -173,15 +179,30 @@
         select(&:follow).
         inject({}) {|memo, r|
           memo[r.sym] = r.follow if r.follow
           memo
         }
-      @terminals = ast.map do |r|
-        (r.first || []) + (r.follow || [])
-      end.flatten.uniq
+      @terminals = ast.map {|r| Array(r.first) + Array(r.follow)}.flatten.uniq
       @terminals = (@terminals - [:_eps, :_eof]).sort_by{|t| t.to_s.sub(/^_/, '')}
 
+      # FIXME: assumes that this is a (seq :PASS), or similar
+      if pass = ast.detect {|r| r.pass?}
+        @pass = pass.expr.last
+      end
+
+      # If a generated terminal is found, this indicates an error, as this version does not automatically generate regular expressions for automatic terminals
+      @terminals.
+        select {|t| t.to_s.start_with?("_")}.
+        reject {|t| t.to_s.start_with?("_pass_")}.  # Concession to backwards compatibility
+        each do |term|
+
+        error("build_tables",
+              "terminal #{term} is automatically generated; " +
+              "regular expressions are not yet generated and parsing " +
+              "is not supported")
+      end
+
       @branch = {}
       @already = []
       @agenda = []
       do_production(@start)
       while !@agenda.empty?
@@ -254,25 +275,36 @@
         error("No record of what token #{lhs} can start with") unless rule.first
         return
       end
 
       if rule.alt?
+        # A First/Follow conflict appears when _eps is in the first
+        # of one rule and there is a token in the first and
+        # follow of the same rule
+        if rule.first.include?(:_eps) && !(overlap = ((rule.first & rule.follow) - [:eps])).empty?
+          error("First/Follow Conflict: #{overlap.first.inspect} is both first and follow of #{rule.sym}")
+        end
+
         # Add entries for each alternative, based on the alternative's first/seq
         rule.expr[1..-1].each do |prod|
           prod_rule = find_rule(prod)
           debug("  Alt", prod)
+
           @agenda << prod unless @already.include?(prod) || @agenda.include?(prod)
           if prod == :_empty
             debug("    empty")
             # Skip empty, rules added bellow for follows
           elsif prod_rule.nil? || prod_rule.first.nil?
             debug("    no first =>", prod)
             branchDict[prod] = [prod]
           else
             prod_rule.first.reject{|f| f == :_eps}.each do |f|
+              # A First/First conflict appears when there are two rules having
+              # the same first, so the parser can't know which one to choose.
               if branchDict.has_key?(f)
-                error("First/First Conflict: #{f} is also the condition for #{branchDict[f]}")
+                error("First/First Conflict: #{f.inspect} is also the condition for #{branchDict[f].first}")
               end
+
               debug("   alt") {"[#{f}] => #{prod}"}
               branchDict[f] = [prod]
             end
           end
         end