spec/rley/parser/earley_parser_spec.rb in rley-0.0.04 vs spec/rley/parser/earley_parser_spec.rb in rley-0.0.05
- old
+ new
@@ -7,265 +7,301 @@
# Load the class under test
require_relative '../../../lib/rley/parser/earley_parser'
module Rley # Open this namespace to avoid module qualifier prefixes
module Parser # Open this namespace to avoid module qualifier prefixes
-
- describe EarleyParser do
+ describe EarleyParser do
=begin
- let(:kw_true) { Syntax::VerbatimSymbol('true') }
- let(:kw_false) { Syntax::VerbatimSymbol('false') }
- let(:kw_null) { Syntax::VerbatimSymbol('null') }
- let(:number) do
- number_pattern = /[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/
- Syntax::Literal('number', number_pattern)
- end
- let(:string) do
- string_pattern = /"([^\\"]|\\.)*"/
- Syntax::Literal('string', string_pattern)
- end
- let(:lbracket) { Syntax::VerbatimSymbol('[') }
- let(:rbracket) { Syntax::VerbatimSymbol(']') }
- let(:comma) { Syntax::VerbatimSymbol(',') }
- let(:array) { Syntax::NonTerminal('Array') }
- let(:object) { Syntax::NonTerminal('Object') }
+ let(:kw_true) { Syntax::VerbatimSymbol.new('true') }
+ let(:kw_false) { Syntax::VerbatimSymbol.new('false') }
+ let(:kw_null) { Syntax::VerbatimSymbol.new('null') }
+ let(:number) do
+ number_pattern = /[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/
+ Syntax::Literal.new('number', number_pattern)
+ end
+ let(:string) do
+ string_pattern = /"([^\\"]|\\.)*"/
+ Syntax::Literal('string', string_pattern)
+ end
+ let(:lbracket) { Syntax::VerbatimSymbol.new('[') }
+ let(:rbracket) { Syntax::VerbatimSymbol.new(']') }
+ let(:comma) { Syntax::VerbatimSymbol.new(',') }
+ let(:array) { Syntax::NonTerminal.new('Array') }
+ let(:object) { Syntax::NonTerminal.new('Object') }
- let(:array_prod) do
- Production.new(array, )
- end
+ let(:array_prod) do
+ Production.new(array, )
+ end
=end
- # Grammar 1: A very simple language
- # S ::= A.
- # A ::= "a" A "c".
- # A ::= "b".
- let(:nt_S) { Syntax::NonTerminal.new('S') }
- let(:nt_A) { Syntax::NonTerminal.new('A') }
- let(:a_) { Syntax::VerbatimSymbol.new('a') }
- let(:b_) { Syntax::VerbatimSymbol.new('b') }
- let(:c_) { Syntax::VerbatimSymbol.new('c') }
- let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
- let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
- let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
- let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
+ # Grammar 1: A very simple language
+ # S ::= A.
+ # A ::= "a" A "c".
+ # A ::= "b".
+ # Let's create the grammar piece by piece
+ let(:nt_S) { Syntax::NonTerminal.new('S') }
+ let(:nt_A) { Syntax::NonTerminal.new('A') }
+ let(:a_) { Syntax::VerbatimSymbol.new('a') }
+ let(:b_) { Syntax::VerbatimSymbol.new('b') }
+ let(:c_) { Syntax::VerbatimSymbol.new('c') }
+ let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
+ let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
+ let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
+ let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
- # Helper method that mimicks the output of a tokenizer
- # for the language specified by gramma_abc
- def grm1_tokens()
- tokens = [
- Token.new('a', a_),
- Token.new('a', a_),
- Token.new('b', b_),
- Token.new('c', c_),
- Token.new('c', c_)
- ]
+ # Helper method that mimicks the output of a tokenizer
+ # for the language specified by grammar_abc
+ def grm1_tokens()
+ tokens = [
+ Token.new('a', a_),
+ Token.new('a', a_),
+ Token.new('b', b_),
+ Token.new('c', c_),
+ Token.new('c', c_)
+ ]
- return tokens
- end
-
-
- # Default instantiation rule
- subject { EarleyParser.new(grammar_abc) }
-
- context 'Initialization:' do
- it 'should be created with a grammar' do
- expect { EarleyParser.new(grammar_abc) }.not_to raise_error
+ return tokens
end
- it 'should know its grammar' do
- expect(subject.grammar).to eq(grammar_abc)
- end
- it 'should know its dotted items' do
- expect(subject.dotted_items.size).to eq(8)
+ # Grammar 2: A simple arithmetic expression language
+ # E ::= S.
+ # S ::= S "+" M.
+ # S ::= M.
+ # M ::= M "*" M.
+ # M ::= T.
+ # T ::= an integer number token.
+ # Let's create the grammar piece by piece
+ let(:nt_E) { Syntax::NonTerminal.new('E') }
+ let(:nt_M) { Syntax::NonTerminal.new('M') }
+ let(:nt_T) { Syntax::NonTerminal.new('T') }
+ let(:plus) { Syntax::VerbatimSymbol.new('+') }
+ let(:star) { Syntax::VerbatimSymbol.new('*') }
+ let(:integer) do
+ integer_pattern = /[-+]?[0-9]+/ # Decimal notation
+ Syntax::Literal.new('integer', integer_pattern)
end
-
- it 'should have its start mapping initialized' do
- expect(subject.start_mapping.size).to eq(2)
-
- start_items_S = subject.start_mapping[nt_S]
- expect(start_items_S.size).to eq(1)
- expect(start_items_S[0].production).to eq(prod_S)
-
- start_items_A = subject.start_mapping[nt_A]
- expect(start_items_A.size).to eq(2)
-
- # Assuming that dotted_items are created in same order
- # than production in grammar.
- expect(start_items_A[0].production).to eq(prod_A1)
- expect(start_items_A[1].production).to eq(prod_A2)
+ let(:prod_E) { Syntax::Production.new(nt_E, [nt_S]) }
+ let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
+ let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
+ let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_M]) }
+ let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
+ let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
+ let(:grammar_expr) do
+ all_prods = [prod_E, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
+ Syntax::Grammar.new(all_prods)
end
- it 'should have its next mapping initialized' do
- expect(subject.next_mapping.size).to eq(5)
- end
- end # context
+ # Helper method that mimicks the output of a tokenizer
+ # for the language specified by grammar_expr
+ def grm2_tokens()
+ tokens = [
+ Token.new('2', integer),
+ Token.new('+', plus),
+ Token.new('3', integer),
+ Token.new('*', star),
+ Token.new('4', integer)
+ ]
- context 'Parsing: ' do
- # Helper method. Compare the data from the parse state
- # with values from expectation hash.
- def compare_state(aState, expectations)
- expect(aState.origin).to eq(expectations[:origin])
- dotted_item = aState.dotted_rule
- expect(dotted_item.production).to eq(expectations[:production])
- expect(dotted_item.position).to eq(expectations[:dot])
+ return tokens
end
- it 'should parse a valid simple input' do
- parse_result = subject.parse(grm1_tokens)
- expect(parse_result.success?).to eq(true)
- ######################
- state_set_0 = parse_result.chart[0]
- # Expectation chart[0]:
- # S -> . A, 0 # start rule
- # A -> . "a" A "c", 0 # predict from 0
- # A -> . "b", 0 # predict from 0
- expectations = { origin: 0, production: prod_S, dot: 0 }
- compare_state(state_set_0.states[0], expectations)
+ # Default instantiation rule
+ subject { EarleyParser.new(grammar_abc) }
- expectations = { origin: 0, production: prod_A1, dot: 0 }
- compare_state(state_set_0.states[1], expectations)
+ context 'Initialization:' do
+ it 'should be created with a grammar' do
+ expect { EarleyParser.new(grammar_abc) }.not_to raise_error
+ expect { EarleyParser.new(grammar_expr) }.not_to raise_error
+ end
- expectations = { origin: 0, production: prod_A2, dot: 0 }
- compare_state(state_set_0.states[2], expectations)
+ it 'should know its grammar' do
+ expect(subject.grammar).to eq(grammar_abc)
+ end
- ######################
- state_set_1 = parse_result.chart[1]
- expect(state_set_1.states.size).to eq(3)
- # Expectation chart[1]:
- # 0: A -> "a" . A "c", 0 # scan from S(0) 1
- # 1: A -> . "a" A "c", 1 # predict from 0
- # 2: A -> . "b", 1 # predict from 0
- expectations = { origin: 0, production: prod_A1, dot: 1 }
- compare_state(state_set_1.states[0], expectations)
+ it 'should know its dotted items' do
+ expect(subject.dotted_items.size).to eq(8)
+ end
- expectations = { origin: 1, production: prod_A1, dot: 0 }
- compare_state(state_set_1.states[1], expectations)
+ it 'should have its start mapping initialized' do
+ expect(subject.start_mapping.size).to eq(2)
- expectations = { origin: 1, production: prod_A2, dot: 0 }
- compare_state(state_set_1.states[2], expectations)
+ start_items_S = subject.start_mapping[nt_S]
+ expect(start_items_S.size).to eq(1)
+ expect(start_items_S[0].production).to eq(prod_S)
- ######################
- state_set_2 = parse_result.chart[2]
- expect(state_set_2.states.size).to eq(3)
- # Expectation chart[2]:
- # 0: A -> "a" . A "c", 1 # scan from S(0) 1
- # 1: A -> . "a" A "c", 2 # predict from 0
- # 2: A -> . "b", 2 # predict from 0
- expectations = { origin: 1, production: prod_A1, dot: 1 }
- compare_state(state_set_2.states[0], expectations)
+ start_items_A = subject.start_mapping[nt_A]
+ expect(start_items_A.size).to eq(2)
- expectations = { origin: 2, production: prod_A1, dot: 0 }
- compare_state(state_set_2.states[1], expectations)
+ # Assuming that dotted_items are created in same order
+ # than production in grammar.
+ expect(start_items_A[0].production).to eq(prod_A1)
+ expect(start_items_A[1].production).to eq(prod_A2)
+ end
- expectations = { origin: 2, production: prod_A2, dot: 0 }
- compare_state(state_set_2.states[2], expectations)
+ it 'should have its next mapping initialized' do
+ expect(subject.next_mapping.size).to eq(5)
+ end
+ end # context
- ######################
- state_set_3 = parse_result.chart[3]
- expect(state_set_3.states.size).to eq(2)
- # Expectation chart[3]:
- # 0: A -> "b" ., 2 # scan from S(2) 2
- # 1: A -> "a" A . "c", 1 # complete from 0 and S(2) 0
- expectations = { origin: 2, production: prod_A2, dot: -1 }
- compare_state(state_set_3.states[0], expectations)
+ context 'Parsing: ' do
+ # Helper method. Compare the data from the parse state
+ # with values from expectation hash.
+ def compare_state(aState, expectations)
+ expect(aState.origin).to eq(expectations[:origin])
+ dotted_item = aState.dotted_rule
+ expect(dotted_item.production).to eq(expectations[:production])
+ expect(dotted_item.position).to eq(expectations[:dot])
+ end
- expectations = { origin: 1, production: prod_A1, dot: 2 }
- compare_state(state_set_3.states[1], expectations)
+ # Helper method. Compare the data from all the parse states
+ # of a given StateSet with an array of expectation hashes.
+ def compare_state_set(aStateSet, expectations)
+ (0...expectations.size).each do |i|
+ compare_state(aStateSet.states[i], expectations[i])
+ end
+ end
- ######################
- state_set_4 = parse_result.chart[4]
- expect(state_set_4.states.size).to eq(2)
- # Expectation chart[4]:
- # 0: A -> "a" A "c" ., 1 # scan from S(3) 1
- # 1: A -> "a" A . "c", 0 # complete from 0 and S(1) 0
- expectations = { origin: 1, production: prod_A1, dot: -1 }
- compare_state(state_set_4.states[0], expectations)
+ it 'should parse a valid simple input' do
+ parse_result = subject.parse(grm1_tokens)
+ expect(parse_result.success?).to eq(true)
- expectations = { origin: 0, production: prod_A1, dot: 2 }
- compare_state(state_set_4.states[1], expectations)
+ ######################
+ # Expectation chart[0]:
+ # S -> . A, 0 # start rule
+ # A -> . "a" A "c", 0 # predict from 0
+ # A -> . "b", 0 # predict from 0
+ expectations = [
+ { origin: 0, production: prod_S, dot: 0 },
+ { origin: 0, production: prod_A1, dot: 0 },
+ { origin: 0, production: prod_A2, dot: 0 }
+ ]
+ compare_state_set(parse_result.chart[0], expectations)
- ######################
- state_set_5 = parse_result.chart[5]
- expect(state_set_5.states.size).to eq(2)
- # Expectation chart[5]:
- # 0: A -> "a" A "c" ., 0 # scan from S(4) 1
- # 1: S -> A ., 0 # complete from 0 and S(0) 0
- expectations = { origin: 0, production: prod_A1, dot: -1 }
- compare_state(state_set_5.states[0], expectations)
+ ######################
+ state_set_1 = parse_result.chart[1]
+ expect(state_set_1.states.size).to eq(3)
+ # Expectation chart[1]:
+ # 0: A -> "a" . A "c", 0 # scan from S(0) 1
+ # 1: A -> . "a" A "c", 1 # predict from 0
+ # 2: A -> . "b", 1 # predict from 0
+ expectations = [
+ { origin: 0, production: prod_A1, dot: 1 },
+ { origin: 1, production: prod_A1, dot: 0 },
+ { origin: 1, production: prod_A2, dot: 0 }
+ ]
+ compare_state_set(state_set_1, expectations)
- expectations = { origin: 0, production: prod_S, dot: -1 }
- compare_state(state_set_5.states[1], expectations)
- end
+ ######################
+ state_set_2 = parse_result.chart[2]
+ expect(state_set_2.states.size).to eq(3)
+ # Expectation chart[2]:
+ # 0: A -> "a" . A "c", 1 # scan from S(0) 1
+ # 1: A -> . "a" A "c", 2 # predict from 0
+ # 2: A -> . "b", 2 # predict from 0
+ expectations = [
+ { origin: 1, production: prod_A1, dot: 1 },
+ { origin: 2, production: prod_A1, dot: 0 },
+ { origin: 2, production: prod_A2, dot: 0 }
+ ]
+ compare_state_set(state_set_2, expectations)
- it 'should parse an invalid simple input' do
- # Parse an erroneous input (b is missing)
- wrong = [
- Token.new('a', a_),
- Token.new('a', a_),
- Token.new('c', c_),
- Token.new('c', c_)
- ]
- parse_result = subject.parse(wrong)
- expect(parse_result.success?).to eq(false)
+ ######################
+ state_set_3 = parse_result.chart[3]
+ expect(state_set_3.states.size).to eq(2)
+ # Expectation chart[3]:
+ # 0: A -> "b" ., 2 # scan from S(2) 2
+ # 1: A -> "a" A . "c", 1 # complete from 0 and S(2) 0
+ expectations = [
+ { origin: 2, production: prod_A2, dot: -1 },
+ { origin: 1, production: prod_A1, dot: 2 }
+ ]
+ compare_state_set(state_set_3, expectations)
- ###################### S(0) == . a a c c
- state_set_0 = parse_result.chart[0]
- # Expectation chart[0]:
- # S -> . A, 0 # start rule
- # A -> . "a" A "c", 0
- # A -> . "b", 0
- expectations = { origin: 0, production: prod_S, dot: 0 }
- compare_state(state_set_0.states[0], expectations)
+ ######################
+ state_set_4 = parse_result.chart[4]
+ expect(state_set_4.states.size).to eq(2)
+ # Expectation chart[4]:
+ # 0: A -> "a" A "c" ., 1 # scan from S(3) 1
+ # 1: A -> "a" A . "c", 0 # complete from 0 and S(1) 0
+ expectations = [
+ { origin: 1, production: prod_A1, dot: -1 },
+ { origin: 0, production: prod_A1, dot: 2 }
+ ]
+ compare_state_set(state_set_4, expectations)
- expectations = { origin: 0, production: prod_A1, dot: 0 }
- compare_state(state_set_0.states[1], expectations)
+ ######################
+ state_set_5 = parse_result.chart[5]
+ expect(state_set_5.states.size).to eq(2)
+ # Expectation chart[5]:
+ # 0: A -> "a" A "c" ., 0 # scan from S(4) 1
+ # 1: S -> A ., 0 # complete from 0 and S(0) 0
+ expectations = [
+ { origin: 0, production: prod_A1, dot: -1 },
+ { origin: 0, production: prod_S, dot: -1 }
+ ]
+ compare_state_set(state_set_5, expectations)
+ end
- expectations = { origin: 0, production: prod_A2, dot: 0 }
- compare_state(state_set_0.states[2], expectations)
+ it 'should parse an invalid simple input' do
+ # Parse an erroneous input (b is missing)
+ wrong = [
+ Token.new('a', a_),
+ Token.new('a', a_),
+ Token.new('c', c_),
+ Token.new('c', c_)
+ ]
+ parse_result = subject.parse(wrong)
+ expect(parse_result.success?).to eq(false)
- ###################### S(1) == a . a c c
- state_set_1 = parse_result.chart[1]
- expect(state_set_1.states.size).to eq(3)
- # Expectation chart[1]:
- # 0: A -> "a" . A "c", 0 # scan from S(0) 1
- # 1: A -> . "a" A "c", 1 # predict from 0
- # 2: A -> . "b", 1 # predict from 0
- expectations = { origin: 0, production: prod_A1, dot: 1 }
- compare_state(state_set_1.states[0], expectations)
+ ###################### S(0) == . a a c c
+ # Expectation chart[0]:
+ # S -> . A, 0 # start rule
+ # A -> . "a" A "c", 0
+ # A -> . "b", 0
+ expectations = [
+ { origin: 0, production: prod_S, dot: 0 },
+ { origin: 0, production: prod_A1, dot: 0 },
+ { origin: 0, production: prod_A2, dot: 0 }
+ ]
+ compare_state_set(parse_result.chart[0], expectations)
- expectations = { origin: 1, production: prod_A1, dot: 0 }
- compare_state(state_set_1.states[1], expectations)
+ ###################### S(1) == a . a c c
+ state_set_1 = parse_result.chart[1]
+ expect(state_set_1.states.size).to eq(3)
+ # Expectation chart[1]:
+ # 0: A -> "a" . A "c", 0 # scan from S(0) 1
+ # 1: A -> . "a" A "c", 1 # predict from 0
+ # 2: A -> . "b", 1 # predict from 0
+ expectations = [
+ { origin: 0, production: prod_A1, dot: 1 },
+ { origin: 1, production: prod_A1, dot: 0 },
+ { origin: 1, production: prod_A2, dot: 0 }
+ ]
+ compare_state_set(state_set_1, expectations)
- expectations = { origin: 1, production: prod_A2, dot: 0 }
- compare_state(state_set_1.states[2], expectations)
+ ###################### S(2) == a a . c c
+ state_set_2 = parse_result.chart[2]
+ expect(state_set_2.states.size).to eq(3)
+ # Expectation chart[2]:
+ # 0: A -> "a" . A "c", 1 # scan from S(0) 1
+ # 1: A -> . "a" A "c", 2 # predict from 0
+ # 2: A -> . "b", 2 # predict from 0
+ expectations = [
+ { origin: 1, production: prod_A1, dot: 1 },
+ { origin: 2, production: prod_A1, dot: 0 },
+ { origin: 2, production: prod_A2, dot: 0 }
+ ]
+ compare_state_set(state_set_2, expectations)
- ###################### S(2) == a a . c c
- state_set_2 = parse_result.chart[2]
- expect(state_set_2.states.size).to eq(3)
- # Expectation chart[2]:
- # 0: A -> "a" . A "c", 1 # scan from S(0) 1
- # 1: A -> . "a" A "c", 2 # predict from 0
- # 2: A -> . "b", 2 # predict from 0
- expectations = { origin: 1, production: prod_A1, dot: 1 }
- compare_state(state_set_2.states[0], expectations)
+ ###################### S(3) == a a c? c
+ state_set_3 = parse_result.chart[3]
+ expect(state_set_3.states).to be_empty # This is an error symptom
+ end
+ end # context
- expectations = { origin: 2, production: prod_A1, dot: 0 }
- compare_state(state_set_2.states[1], expectations)
-
- expectations = { origin: 2, production: prod_A2, dot: 0 }
- compare_state(state_set_2.states[2], expectations)
-
- ###################### S(3) == a a c? c
- state_set_3 = parse_result.chart[3]
- expect(state_set_3.states).to be_empty # This is an error symptom
- end
- end # context
-
- end # describe
-
+ end # describe
end # module
end # module
# End of file
-