lib/rley/parser/earley_parser.rb in rley-0.2.03 vs lib/rley/parser/earley_parser.rb in rley-0.2.04
- old
+ new
@@ -1,6 +1,7 @@
require_relative '../syntax/grammar'
+require_relative 'parse_tracer'
require_relative 'dotted_item'
require_relative 'parsing'
module Rley # This module is used as a namespace
module Parser # This module is used as a namespace
@@ -17,43 +18,69 @@
# A Hash that defines the mapping: dotted item => next dotted item
# In other words, the 'next_mapping' allows to find the dotted item
# after "advancing" the dot
attr_reader(:next_mapping)
-
- # @param aGrammar [Grammar] The grammar of the language
- # (to use by the parser).
+
def initialize(aGrammar)
@grammar = aGrammar
@dotted_items = build_dotted_items(grammar)
@start_mapping = build_start_mapping(dotted_items)
@next_mapping = build_next_mapping(dotted_items)
end
+=begin
+ You can optionally specify a tracing level, for how much output you
+ want to see:
+
+ 0: No output.
+ 1: Show edges from scanner and completer rules (not predictor).
+ 2 (default): Show all edges as they are added to the chart.
+
+ - For each index I{end} in [0, 1, ..., N]:
+ - For each I{edge} s.t. I{edge}.end = I{end}:
+ - If I{edge} is incomplete, and I{edge}.next is not a part
+ of speech:
+ - Apply PredictorRule to I{edge}
+ - If I{edge} is incomplete, and I{edge}.next is a part of
+ speech:
+ - Apply ScannerRule to I{edge}
+ - If I{edge} is complete:
+ - Apply CompleterRule to I{edge}
+ - Return any complete parses in the chart
+=end
+
# Parse a sequence of input tokens.
# @param aTokenSequence [Array] Array of Tokens objects returned by a
# tokenizer/scanner/lexer.
+ # @param aGrammar [Grammar] The grammar of the language
+ # (to use by the parser).
+ # @param aTraceLevel [Fixnum] The specified trace level.
+ # The possible values are:
+ # 0: No trace output (default case)
+ # 1: Show trace of scanning and completion rules
+ # 2: Same as of 1 with the addition of the prediction rules
# @return [Parsing] an object that embeds the parse results.
- def parse(aTokenSequence)
- result = Parsing.new(start_dotted_item, aTokenSequence)
+ def parse(aTokenSequence, aTraceLevel = 0)
+ tracer = ParseTracer.new(aTraceLevel, $stdout, aTokenSequence)
+ result = Parsing.new(start_dotted_item, aTokenSequence, tracer)
last_token_index = aTokenSequence.size
(0..last_token_index).each do |i|
predicted = Set.new
result.chart[i].each do |state|
- if state.complete?
- # parse reached end of production
- completion(result, state, i)
+ if state.complete? # End of production reached?
+ completion(result, state, i, tracer)
else
next_symbol = state.next_symbol
if next_symbol.kind_of?(Syntax::NonTerminal)
unless predicted.include? next_symbol
- prediction(result, state, next_symbol, i)
+ prediction(result, state, next_symbol, i, tracer)
predicted << next_symbol # Avoid repeated predictions
end
elsif i < last_token_index
# Expecting a terminal symbol
- scanning(result, next_symbol, i)
+ scanning(result, next_symbol, i, tracer)
end
end
end
end
@@ -132,22 +159,25 @@
# @param aState [ParseState] current parse state being processed
# @param aNonTerminal [NonTerminal] a non-terminal symbol that
# immediately follows a dot
# (= is expected/predicted by the production rule)
# @param aPosition [Fixnum] position in the input token sequence.
- def prediction(aParsing, aState, aNonTerminal, aPosition)
+ def prediction(aParsing, aState, aNonTerminal, aPosition, aTracer)
+ if aTracer.level > 1
+ puts "Chart[#{aPosition}] Prediction(s) from #{aState}:"
+ end
# Retrieve all start dotted items for productions
# with aNonTerminal as its lhs
items = start_mapping[aNonTerminal]
items.each do |an_item|
- aParsing.push_state(an_item, aPosition, aPosition)
+ aParsing.push_state(an_item, aPosition, aPosition, :prediction)
end
return unless aNonTerminal.nullable?
# Ayock-Horspool trick for nullable rules
next_item = next_mapping[aState.dotted_rule]
- aParsing.push_state(next_item, aState.origin, aPosition)
+ aParsing.push_state(next_item, aState.origin, aPosition, :prediction)
end
# This method is called when a parse state for chart entry at position
# 'pos' expects a terminal as next symbol.
# If the input token matches the terminal symbol then:
@@ -160,25 +190,33 @@
# @param aParsing [Parsing] the object that encapsulates the results
# result of the parsing process
# @param aTerminal [Terminal] a terminal symbol that
# immediately follows a dot
# @param aPosition [Fixnum] position in the input token sequence.
- def scanning(aParsing, aTerminal, aPosition)
+ def scanning(aParsing, aTerminal, aPosition, aTracer)
+ if aTracer.level > 1
+ prefix = "Chart[#{aPosition}] Scanning of terminal "
+ suffix = "#{aTerminal.name}:"
+ puts prefix + suffix
+ end
aParsing.scanning(aTerminal, aPosition) do |item|
next_mapping[item]
end
end
# This method is called when a parse state at chart entry reaches
# the end of a production.
# For every state in chart[aPosition] that is
# complete (i.e. of the form: { dotted_rule: X -> γ •, origin: j}),
# Find states s in chart[j] of the
- # form {dotted_rule: Y -> α • X β, origin: i}
+ # form { dotted_rule: Y -> α • X β, origin: i}
# In other words, rules that predicted the non-terminal X.
# For each s, add to chart[aPosition] a state of the form
# { dotted_rule: Y → α X • β, origin: i})
- def completion(aParsing, aState, aPosition)
+ def completion(aParsing, aState, aPosition, aTracer)
+ if aTracer.level > 1
+ puts "Chart[#{aPosition}] Completion of state #{aState}:"
+ end
aParsing.completion(aState, aPosition) do |item|
next_mapping[item]
end
end
end # class