lib/ebnf/ll1.rb in ebnf-0.3.1 vs lib/ebnf/ll1.rb in ebnf-0.3.2

- old
+ new

@@ -19,16 +19,25 @@ # # @return [Hash{Symbol, String => Symbol}] attr_reader :follow # Terminal table + # # The list of terminals used in the grammar. # # @return [Array<String, Symbol>] attr_reader :terminals + # Pass expression + # + # A Terminal symbol used for skipping whitespace and comments + # + # @return [Symbol, String] + attr_reader :pass + # Start symbol + # # The rule which starts the grammar # # @return [Symbol] attr_reader :start @@ -67,11 +76,10 @@ rule.comp = new_rule end @ast += comprehensions progress("FF.c") {"(#{ittr}) comprehensions #{comprehensions.length}"} - #require 'debugger'; breakpoint ittr += 1 end while !comprehensions.empty? ittr = 0 begin @@ -126,11 +134,10 @@ debug("Fo.2.1") {"(#{ittr}) add follow #{comp.first.inspect} from #{comp.sym} to #{ai.sym}"} follows += ai.add_follow(comp.first) end # If there is no comprehension of this rule (meaning, it is a sequence of one non-terminal), then the follows of the non-terminal include the follows of the rule. This handles rules with multiple sequences because it will have a comprehension that includes the last element in the sequence - #require 'debugger'; breakpoint if ai.sym == :_predicateObjectList_1 && aj.sym == :_predicateObjectList_7 if !aj.comp && aj.follow debug("Fo.2.1a") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{ai.sym}"} follows += ai.add_follow(aj.follow) end @@ -140,11 +147,10 @@ follows += ai.add_follow(aj.follow) end end # Since the rules are of the form wAiw', and we've handled the case which is just Aiw', this leaves those cases that have rules prior to Ai. This basically says that the follows of a rule are added to the follows of the comprehension of the rule - #require 'debugger'; breakpoint if aj.sym == :_predicateObjectList_6 && aj.follow if aj.comp && aj.follow debug("Fo.2.3") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{aj.comp.sym}"} follows += aj.comp.add_follow(aj.follow) end end @@ -173,15 +179,30 @@ select(&:follow). inject({}) {|memo, r| memo[r.sym] = r.follow if r.follow memo } - @terminals = ast.map do |r| - (r.first || []) + (r.follow || []) - end.flatten.uniq + @terminals = ast.map {|r| Array(r.first) + Array(r.follow)}.flatten.uniq @terminals = (@terminals - [:_eps, :_eof]).sort_by{|t| t.to_s.sub(/^_/, '')} + # FIXME: assumes that this is a (seq :PASS), or similar + if pass = ast.detect {|r| r.pass?} + @pass = pass.expr.last + end + + # If a generated terminal is found, this indicates an error, as this version does not automatically generate regular expressions for automatic terminals + @terminals. + select {|t| t.to_s.start_with?("_")}. + reject {|t| t.to_s.start_with?("_pass_")}. # Concession to backwards compatibility + each do |term| + + error("build_tables", + "terminal #{term} is automatically generated; " + + "regular expressions are not yet generated and parsing " + + "is not supported") + end + @branch = {} @already = [] @agenda = [] do_production(@start) while !@agenda.empty? @@ -254,25 +275,36 @@ error("No record of what token #{lhs} can start with") unless rule.first return end if rule.alt? + # A First/Follow conflict appears when _eps is in the first + # of one rule and there is a token in the first and + # follow of the same rule + if rule.first.include?(:_eps) && !(overlap = ((rule.first & rule.follow) - [:eps])).empty? + error("First/Follow Conflict: #{overlap.first.inspect} is both first and follow of #{rule.sym}") + end + # Add entries for each alternative, based on the alternative's first/seq rule.expr[1..-1].each do |prod| prod_rule = find_rule(prod) debug(" Alt", prod) + @agenda << prod unless @already.include?(prod) || @agenda.include?(prod) if prod == :_empty debug(" empty") # Skip empty, rules added bellow for follows elsif prod_rule.nil? || prod_rule.first.nil? debug(" no first =>", prod) branchDict[prod] = [prod] else prod_rule.first.reject{|f| f == :_eps}.each do |f| + # A First/First conflict appears when there are two rules having + # the same first, so the parser can't know which one to choose. if branchDict.has_key?(f) - error("First/First Conflict: #{f} is also the condition for #{branchDict[f]}") + error("First/First Conflict: #{f.inspect} is also the condition for #{branchDict[f].first}") end + debug(" alt") {"[#{f}] => #{prod}"} branchDict[f] = [prod] end end end