lib/ebnf/ll1.rb in ebnf-0.3.1 vs lib/ebnf/ll1.rb in ebnf-0.3.2
- old
+ new
@@ -19,16 +19,25 @@
#
# @return [Hash{Symbol, String => Symbol}]
attr_reader :follow
# Terminal table
+ #
# The list of terminals used in the grammar.
#
# @return [Array<String, Symbol>]
attr_reader :terminals
+ # Pass expression
+ #
+ # A Terminal symbol used for skipping whitespace and comments
+ #
+ # @return [Symbol, String]
+ attr_reader :pass
+
# Start symbol
+ #
# The rule which starts the grammar
#
# @return [Symbol]
attr_reader :start
@@ -67,11 +76,10 @@
rule.comp = new_rule
end
@ast += comprehensions
progress("FF.c") {"(#{ittr}) comprehensions #{comprehensions.length}"}
- #require 'debugger'; breakpoint
ittr += 1
end while !comprehensions.empty?
ittr = 0
begin
@@ -126,11 +134,10 @@
debug("Fo.2.1") {"(#{ittr}) add follow #{comp.first.inspect} from #{comp.sym} to #{ai.sym}"}
follows += ai.add_follow(comp.first)
end
# If there is no comprehension of this rule (meaning, it is a sequence of one non-terminal), then the follows of the non-terminal include the follows of the rule. This handles rules with multiple sequences because it will have a comprehension that includes the last element in the sequence
- #require 'debugger'; breakpoint if ai.sym == :_predicateObjectList_1 && aj.sym == :_predicateObjectList_7
if !aj.comp && aj.follow
debug("Fo.2.1a") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{ai.sym}"}
follows += ai.add_follow(aj.follow)
end
@@ -140,11 +147,10 @@
follows += ai.add_follow(aj.follow)
end
end
# Since the rules are of the form wAiw', and we've handled the case which is just Aiw', this leaves those cases that have rules prior to Ai. This basically says that the follows of a rule are added to the follows of the comprehension of the rule
- #require 'debugger'; breakpoint if aj.sym == :_predicateObjectList_6 && aj.follow
if aj.comp && aj.follow
debug("Fo.2.3") {"(#{ittr}) add follow #{aj.follow.inspect} from #{aj.sym} to #{aj.comp.sym}"}
follows += aj.comp.add_follow(aj.follow)
end
end
@@ -173,15 +179,30 @@
select(&:follow).
inject({}) {|memo, r|
memo[r.sym] = r.follow if r.follow
memo
}
- @terminals = ast.map do |r|
- (r.first || []) + (r.follow || [])
- end.flatten.uniq
+ @terminals = ast.map {|r| Array(r.first) + Array(r.follow)}.flatten.uniq
@terminals = (@terminals - [:_eps, :_eof]).sort_by{|t| t.to_s.sub(/^_/, '')}
+ # FIXME: assumes that this is a (seq :PASS), or similar
+ if pass = ast.detect {|r| r.pass?}
+ @pass = pass.expr.last
+ end
+
+ # If a generated terminal is found, this indicates an error, as this version does not automatically generate regular expressions for automatic terminals
+ @terminals.
+ select {|t| t.to_s.start_with?("_")}.
+ reject {|t| t.to_s.start_with?("_pass_")}. # Concession to backwards compatibility
+ each do |term|
+
+ error("build_tables",
+ "terminal #{term} is automatically generated; " +
+ "regular expressions are not yet generated and parsing " +
+ "is not supported")
+ end
+
@branch = {}
@already = []
@agenda = []
do_production(@start)
while !@agenda.empty?
@@ -254,25 +275,36 @@
error("No record of what token #{lhs} can start with") unless rule.first
return
end
if rule.alt?
+ # A First/Follow conflict appears when _eps is in the first
+ # of one rule and there is a token in the first and
+ # follow of the same rule
+ if rule.first.include?(:_eps) && !(overlap = ((rule.first & rule.follow) - [:eps])).empty?
+ error("First/Follow Conflict: #{overlap.first.inspect} is both first and follow of #{rule.sym}")
+ end
+
# Add entries for each alternative, based on the alternative's first/seq
rule.expr[1..-1].each do |prod|
prod_rule = find_rule(prod)
debug(" Alt", prod)
+
@agenda << prod unless @already.include?(prod) || @agenda.include?(prod)
if prod == :_empty
debug(" empty")
# Skip empty, rules added bellow for follows
elsif prod_rule.nil? || prod_rule.first.nil?
debug(" no first =>", prod)
branchDict[prod] = [prod]
else
prod_rule.first.reject{|f| f == :_eps}.each do |f|
+ # A First/First conflict appears when there are two rules having
+ # the same first, so the parser can't know which one to choose.
if branchDict.has_key?(f)
- error("First/First Conflict: #{f} is also the condition for #{branchDict[f]}")
+ error("First/First Conflict: #{f.inspect} is also the condition for #{branchDict[f].first}")
end
+
debug(" alt") {"[#{f}] => #{prod}"}
branchDict[f] = [prod]
end
end
end