earley_parser_spec.rb in rley-0.0.05

- old
+ new
@@ -7,265 +7,301 @@
 # Load the class under test
 require_relative '../../../lib/rley/parser/earley_parser'
 
 module Rley # Open this namespace to avoid module qualifier prefixes
   module Parser # Open this namespace to avoid module qualifier prefixes
-
-  describe EarleyParser do
+    describe EarleyParser do
 =begin
-    let(:kw_true) { Syntax::VerbatimSymbol('true') }
-    let(:kw_false) { Syntax::VerbatimSymbol('false') }
-    let(:kw_null) { Syntax::VerbatimSymbol('null') }
-    let(:number) do
-      number_pattern = /[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/
-      Syntax::Literal('number', number_pattern)
-    end
-    let(:string) do
-      string_pattern = /"([^\\"]|\\.)*"/
-      Syntax::Literal('string', string_pattern)
-    end
-    let(:lbracket) { Syntax::VerbatimSymbol('[') }
-    let(:rbracket) { Syntax::VerbatimSymbol(']') }
-    let(:comma) { Syntax::VerbatimSymbol(',') }
-    let(:array) { Syntax::NonTerminal('Array') }
-    let(:object) { Syntax::NonTerminal('Object') }
+      let(:kw_true) { Syntax::VerbatimSymbol.new('true') }
+      let(:kw_false) { Syntax::VerbatimSymbol.new('false') }
+      let(:kw_null) { Syntax::VerbatimSymbol.new('null') }
+      let(:number) do
+        number_pattern = /[-+]?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9]+)?/
+        Syntax::Literal.new('number', number_pattern)
+      end
+      let(:string) do
+        string_pattern = /"([^\\"]|\\.)*"/
+        Syntax::Literal('string', string_pattern)
+      end
+      let(:lbracket) { Syntax::VerbatimSymbol.new('[') }
+      let(:rbracket) { Syntax::VerbatimSymbol.new(']') }
+      let(:comma) { Syntax::VerbatimSymbol.new(',') }
+      let(:array) { Syntax::NonTerminal.new('Array') }
+      let(:object) { Syntax::NonTerminal.new('Object') }
 
-    let(:array_prod) do
-      Production.new(array, )
-    end
+      let(:array_prod) do
+        Production.new(array, )
+      end
 =end
 
-    # Grammar 1: A very simple language
-    # S ::= A.
-    # A ::= "a" A "c".
-    # A ::= "b".
-    let(:nt_S) { Syntax::NonTerminal.new('S') }
-    let(:nt_A) { Syntax::NonTerminal.new('A') }
-    let(:a_) { Syntax::VerbatimSymbol.new('a') }
-    let(:b_)  { Syntax::VerbatimSymbol.new('b') }
-    let(:c_)  { Syntax::VerbatimSymbol.new('c') }
-    let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
-    let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
-    let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
-    let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
+      # Grammar 1: A very simple language
+      # S ::= A.
+      # A ::= "a" A "c".
+      # A ::= "b".
+      # Let's create the grammar piece by piece
+      let(:nt_S) { Syntax::NonTerminal.new('S') }
+      let(:nt_A) { Syntax::NonTerminal.new('A') }
+      let(:a_) { Syntax::VerbatimSymbol.new('a') }
+      let(:b_)  { Syntax::VerbatimSymbol.new('b') }
+      let(:c_)  { Syntax::VerbatimSymbol.new('c') }
+      let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
+      let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
+      let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
+      let(:grammar_abc) { Syntax::Grammar.new([prod_S, prod_A1, prod_A2]) }
 
-    # Helper method that mimicks the output of a tokenizer
-    # for the language specified by gramma_abc
-    def grm1_tokens()
-      tokens = [
-        Token.new('a', a_),
-        Token.new('a', a_),
-        Token.new('b', b_),
-        Token.new('c', c_),
-        Token.new('c', c_)
-      ]
+      # Helper method that mimicks the output of a tokenizer
+      # for the language specified by grammar_abc
+      def grm1_tokens()
+        tokens = [
+          Token.new('a', a_),
+          Token.new('a', a_),
+          Token.new('b', b_),
+          Token.new('c', c_),
+          Token.new('c', c_)
+        ]
 
-      return tokens
-    end
-
-
-    # Default instantiation rule
-    subject { EarleyParser.new(grammar_abc) }
-
-    context 'Initialization:' do
-      it 'should be created with a grammar' do
-        expect { EarleyParser.new(grammar_abc) }.not_to raise_error
+        return tokens
       end
 
-      it 'should know its grammar' do
-        expect(subject.grammar).to eq(grammar_abc)
-      end
 
-      it 'should know its dotted items' do
-        expect(subject.dotted_items.size).to eq(8)
+      # Grammar 2: A simple arithmetic expression language
+      # E ::= S.
+      # S ::= S "+" M.
+      # S ::= M.
+      # M ::= M "*" M.
+      # M ::= T.
+      # T ::= an integer number token.
+      # Let's create the grammar piece by piece
+      let(:nt_E) { Syntax::NonTerminal.new('E') }
+      let(:nt_M) { Syntax::NonTerminal.new('M') }
+      let(:nt_T) { Syntax::NonTerminal.new('T') }
+      let(:plus) { Syntax::VerbatimSymbol.new('+') }
+      let(:star)  { Syntax::VerbatimSymbol.new('*') }
+      let(:integer) do
+        integer_pattern = /[-+]?[0-9]+/	# Decimal notation
+        Syntax::Literal.new('integer', integer_pattern)
       end
-
-      it 'should have its start mapping initialized' do
-        expect(subject.start_mapping.size).to eq(2)
-
-        start_items_S = subject.start_mapping[nt_S]
-        expect(start_items_S.size).to eq(1)
-        expect(start_items_S[0].production).to eq(prod_S)
-
-        start_items_A = subject.start_mapping[nt_A]
-        expect(start_items_A.size).to eq(2)
-
-        # Assuming that dotted_items are created in same order
-        # than production in grammar.
-        expect(start_items_A[0].production).to eq(prod_A1)
-        expect(start_items_A[1].production).to eq(prod_A2)
+      let(:prod_E) { Syntax::Production.new(nt_E, [nt_S]) }
+      let(:prod_S1) { Syntax::Production.new(nt_S, [nt_S, plus, nt_M]) }
+      let(:prod_S2) { Syntax::Production.new(nt_S, [nt_M]) }
+      let(:prod_M1) { Syntax::Production.new(nt_M, [nt_M, star, nt_M]) }
+      let(:prod_M2) { Syntax::Production.new(nt_M, [nt_T]) }
+      let(:prod_T) { Syntax::Production.new(nt_T, [integer]) }
+      let(:grammar_expr) do
+        all_prods = [prod_E, prod_S1, prod_S2, prod_M1, prod_M2, prod_T]
+        Syntax::Grammar.new(all_prods)
       end
 
-      it 'should have its next mapping initialized' do
-        expect(subject.next_mapping.size).to eq(5)
-      end
-    end # context
+      # Helper method that mimicks the output of a tokenizer
+      # for the language specified by grammar_expr
+      def grm2_tokens()
+        tokens = [
+          Token.new('2', integer),
+          Token.new('+', plus),
+          Token.new('3', integer),
+          Token.new('*', star),
+          Token.new('4', integer)
+        ]
 
-    context 'Parsing: ' do
-      # Helper method. Compare the data from the parse state
-      # with values from expectation hash.
-      def compare_state(aState, expectations)
-        expect(aState.origin).to eq(expectations[:origin])
-        dotted_item = aState.dotted_rule
-        expect(dotted_item.production).to eq(expectations[:production])
-        expect(dotted_item.position).to eq(expectations[:dot])
+        return tokens
       end
 
-      it 'should parse a valid simple input' do
-        parse_result = subject.parse(grm1_tokens)
-        expect(parse_result.success?).to eq(true)
 
-        ######################
-        state_set_0 = parse_result.chart[0]
-        # Expectation chart[0]:
-        # S -> . A, 0           # start rule
-        # A -> . "a" A "c", 0   # predict from 0
-        # A -> . "b", 0         # predict from 0
-        expectations = { origin: 0, production: prod_S, dot: 0 }
-        compare_state(state_set_0.states[0], expectations)
+      # Default instantiation rule
+      subject { EarleyParser.new(grammar_abc) }
 
-        expectations = { origin: 0, production: prod_A1, dot: 0 }
-        compare_state(state_set_0.states[1], expectations)
+      context 'Initialization:' do
+        it 'should be created with a grammar' do
+          expect { EarleyParser.new(grammar_abc) }.not_to raise_error
+          expect { EarleyParser.new(grammar_expr) }.not_to raise_error
+        end
 
-        expectations = { origin: 0, production: prod_A2, dot: 0 }
-        compare_state(state_set_0.states[2], expectations)
+        it 'should know its grammar' do
+          expect(subject.grammar).to eq(grammar_abc)
+        end
 
-        ######################
-        state_set_1 = parse_result.chart[1]
-        expect(state_set_1.states.size).to eq(3)
-        # Expectation chart[1]:
-        # 0: A -> "a" . A "c", 0   # scan from S(0) 1
-        # 1: A -> . "a" A "c", 1   # predict from 0
-        # 2: A -> . "b", 1         # predict from 0
-        expectations = { origin: 0, production: prod_A1, dot: 1 }
-        compare_state(state_set_1.states[0], expectations)
+        it 'should know its dotted items' do
+          expect(subject.dotted_items.size).to eq(8)
+        end
 
-        expectations = { origin: 1, production: prod_A1, dot: 0 }
-        compare_state(state_set_1.states[1], expectations)
+        it 'should have its start mapping initialized' do
+          expect(subject.start_mapping.size).to eq(2)
 
-        expectations = { origin: 1, production: prod_A2, dot: 0 }
-        compare_state(state_set_1.states[2], expectations)
+          start_items_S = subject.start_mapping[nt_S]
+          expect(start_items_S.size).to eq(1)
+          expect(start_items_S[0].production).to eq(prod_S)
 
-        ######################
-        state_set_2 = parse_result.chart[2]
-        expect(state_set_2.states.size).to eq(3)
-        # Expectation chart[2]:
-        # 0: A -> "a" . A "c", 1  # scan from S(0) 1
-        # 1: A -> . "a" A "c", 2  # predict from 0
-        # 2: A -> . "b", 2        # predict from 0
-        expectations = { origin: 1, production: prod_A1, dot: 1 }
-        compare_state(state_set_2.states[0], expectations)
+          start_items_A = subject.start_mapping[nt_A]
+          expect(start_items_A.size).to eq(2)
 
-        expectations = { origin: 2, production: prod_A1, dot: 0 }
-        compare_state(state_set_2.states[1], expectations)
+          # Assuming that dotted_items are created in same order
+          # than production in grammar.
+          expect(start_items_A[0].production).to eq(prod_A1)
+          expect(start_items_A[1].production).to eq(prod_A2)
+        end
 
-        expectations = { origin: 2, production: prod_A2, dot: 0 }
-        compare_state(state_set_2.states[2], expectations)
+        it 'should have its next mapping initialized' do
+          expect(subject.next_mapping.size).to eq(5)
+        end
+      end # context
 
-        ######################
-        state_set_3 = parse_result.chart[3]
-        expect(state_set_3.states.size).to eq(2)
-        # Expectation chart[3]:
-        # 0: A -> "b" ., 2      # scan from S(2) 2
-        # 1: A -> "a" A . "c", 1 # complete from 0 and S(2) 0
-        expectations = { origin: 2, production: prod_A2, dot: -1 }
-        compare_state(state_set_3.states[0], expectations)
+      context 'Parsing: ' do
+        # Helper method. Compare the data from the parse state
+        # with values from expectation hash.
+        def compare_state(aState, expectations)
+          expect(aState.origin).to eq(expectations[:origin])
+          dotted_item = aState.dotted_rule
+          expect(dotted_item.production).to eq(expectations[:production])
+          expect(dotted_item.position).to eq(expectations[:dot])
+        end
 
-        expectations = { origin: 1, production: prod_A1, dot: 2 }
-        compare_state(state_set_3.states[1], expectations)
+        # Helper method. Compare the data from all the parse states
+        # of a given StateSet with an array of expectation hashes.
+        def compare_state_set(aStateSet, expectations)
+          (0...expectations.size).each do |i|
+            compare_state(aStateSet.states[i], expectations[i])
+          end
+        end
 
-        ######################
-        state_set_4 = parse_result.chart[4]
-        expect(state_set_4.states.size).to eq(2)
-        # Expectation chart[4]:
-        # 0: A -> "a" A "c" ., 1  # scan from S(3) 1
-        # 1: A -> "a" A . "c", 0  # complete from 0 and S(1) 0
-        expectations = { origin: 1, production: prod_A1, dot: -1 }
-        compare_state(state_set_4.states[0], expectations)
+        it 'should parse a valid simple input' do
+          parse_result = subject.parse(grm1_tokens)
+          expect(parse_result.success?).to eq(true)
 
-        expectations = { origin: 0, production: prod_A1, dot: 2 }
-        compare_state(state_set_4.states[1], expectations)
+          ######################
+          # Expectation chart[0]:
+          # S -> . A, 0           # start rule
+          # A -> . "a" A "c", 0   # predict from 0
+          # A -> . "b", 0         # predict from 0
+          expectations = [
+            { origin: 0, production: prod_S, dot: 0 },
+            { origin: 0, production: prod_A1, dot: 0 },
+            { origin: 0, production: prod_A2, dot: 0 }
+          ]
+          compare_state_set(parse_result.chart[0], expectations)
 
-        ######################
-        state_set_5 = parse_result.chart[5]
-        expect(state_set_5.states.size).to eq(2)
-        # Expectation chart[5]:
-        # 0: A -> "a" A "c" ., 0  # scan from S(4) 1
-        # 1: S -> A ., 0  # complete from 0 and S(0) 0
-        expectations = { origin: 0, production: prod_A1, dot: -1 }
-        compare_state(state_set_5.states[0], expectations)
+          ######################
+          state_set_1 = parse_result.chart[1]
+          expect(state_set_1.states.size).to eq(3)
+          # Expectation chart[1]:
+          # 0: A -> "a" . A "c", 0   # scan from S(0) 1
+          # 1: A -> . "a" A "c", 1   # predict from 0
+          # 2: A -> . "b", 1         # predict from 0
+          expectations = [
+            { origin: 0, production: prod_A1, dot: 1 },
+            { origin: 1, production: prod_A1, dot: 0 },
+            { origin: 1, production: prod_A2, dot: 0 }
+          ]
+          compare_state_set(state_set_1, expectations)
 
-        expectations = { origin: 0, production: prod_S, dot: -1 }
-        compare_state(state_set_5.states[1], expectations)
-      end
+          ######################
+          state_set_2 = parse_result.chart[2]
+          expect(state_set_2.states.size).to eq(3)
+          # Expectation chart[2]:
+          # 0: A -> "a" . A "c", 1  # scan from S(0) 1
+          # 1: A -> . "a" A "c", 2  # predict from 0
+          # 2: A -> . "b", 2        # predict from 0
+          expectations = [
+            { origin: 1, production: prod_A1, dot: 1 },
+            { origin: 2, production: prod_A1, dot: 0 },
+            { origin: 2, production: prod_A2, dot: 0 }
+          ]
+          compare_state_set(state_set_2, expectations)
 
-      it 'should parse an invalid simple input' do
-        # Parse an erroneous input (b is missing)
-        wrong = [
-          Token.new('a', a_),
-          Token.new('a', a_),
-          Token.new('c', c_),
-          Token.new('c', c_)
-        ]
-        parse_result = subject.parse(wrong)
-        expect(parse_result.success?).to eq(false)
+          ######################
+          state_set_3 = parse_result.chart[3]
+          expect(state_set_3.states.size).to eq(2)
+          # Expectation chart[3]:
+          # 0: A -> "b" ., 2      # scan from S(2) 2
+          # 1: A -> "a" A . "c", 1 # complete from 0 and S(2) 0
+          expectations = [
+            { origin: 2, production: prod_A2, dot: -1 },
+            { origin: 1, production: prod_A1, dot: 2 }
+          ]
+          compare_state_set(state_set_3, expectations)
 
-        ###################### S(0) == . a a c c
-        state_set_0 = parse_result.chart[0]
-        # Expectation chart[0]:
-        # S -> . A, 0 # start rule
-        # A -> . "a" A "c", 0
-        # A -> . "b", 0
-        expectations = { origin: 0, production: prod_S, dot: 0 }
-        compare_state(state_set_0.states[0], expectations)
+          ######################
+          state_set_4 = parse_result.chart[4]
+          expect(state_set_4.states.size).to eq(2)
+          # Expectation chart[4]:
+          # 0: A -> "a" A "c" ., 1  # scan from S(3) 1
+          # 1: A -> "a" A . "c", 0  # complete from 0 and S(1) 0
+          expectations = [
+            { origin: 1, production: prod_A1, dot: -1 },
+            { origin: 0, production: prod_A1, dot: 2 }
+          ]
+          compare_state_set(state_set_4, expectations)
 
-        expectations = { origin: 0, production: prod_A1, dot: 0 }
-        compare_state(state_set_0.states[1], expectations)
+          ######################
+          state_set_5 = parse_result.chart[5]
+          expect(state_set_5.states.size).to eq(2)
+          # Expectation chart[5]:
+          # 0: A -> "a" A "c" ., 0  # scan from S(4) 1
+          # 1: S -> A ., 0  # complete from 0 and S(0) 0
+          expectations = [
+            { origin: 0, production: prod_A1, dot: -1 },
+            { origin: 0, production: prod_S, dot: -1 }
+          ]
+          compare_state_set(state_set_5, expectations)
+        end
 
-        expectations = { origin: 0, production: prod_A2, dot: 0 }
-        compare_state(state_set_0.states[2], expectations)
+        it 'should parse an invalid simple input' do
+          # Parse an erroneous input (b is missing)
+          wrong = [
+            Token.new('a', a_),
+            Token.new('a', a_),
+            Token.new('c', c_),
+            Token.new('c', c_)
+          ]
+          parse_result = subject.parse(wrong)
+          expect(parse_result.success?).to eq(false)
 
-        ###################### S(1) == a . a c c
-        state_set_1 = parse_result.chart[1]
-        expect(state_set_1.states.size).to eq(3)
-        # Expectation chart[1]:
-        # 0: A -> "a" . A "c", 0   # scan from S(0) 1
-        # 1: A -> . "a" A "c", 1   # predict from 0
-        # 2: A -> . "b", 1         # predict from 0
-        expectations = { origin: 0, production: prod_A1, dot: 1 }
-        compare_state(state_set_1.states[0], expectations)
+          ###################### S(0) == . a a c c
+          # Expectation chart[0]:
+          # S -> . A, 0 # start rule
+          # A -> . "a" A "c", 0
+          # A -> . "b", 0
+          expectations = [
+            { origin: 0, production: prod_S, dot: 0 },
+            { origin: 0, production: prod_A1, dot: 0 },
+            { origin: 0, production: prod_A2, dot: 0 }
+          ]
+          compare_state_set(parse_result.chart[0], expectations)
 
-        expectations = { origin: 1, production: prod_A1, dot: 0 }
-        compare_state(state_set_1.states[1], expectations)
+          ###################### S(1) == a . a c c
+          state_set_1 = parse_result.chart[1]
+          expect(state_set_1.states.size).to eq(3)
+          # Expectation chart[1]:
+          # 0: A -> "a" . A "c", 0   # scan from S(0) 1
+          # 1: A -> . "a" A "c", 1   # predict from 0
+          # 2: A -> . "b", 1         # predict from 0
+          expectations = [
+            { origin: 0, production: prod_A1, dot: 1 },
+            { origin: 1, production: prod_A1, dot: 0 },
+            { origin: 1, production: prod_A2, dot: 0 }
+          ]
+          compare_state_set(state_set_1, expectations)
 
-        expectations = { origin: 1, production: prod_A2, dot: 0 }
-        compare_state(state_set_1.states[2], expectations)
+          ###################### S(2) == a a . c c
+          state_set_2 = parse_result.chart[2]
+          expect(state_set_2.states.size).to eq(3)
+          # Expectation chart[2]:
+          # 0: A -> "a" . A "c", 1  # scan from S(0) 1
+          # 1: A -> . "a" A "c", 2  # predict from 0
+          # 2: A -> . "b", 2        # predict from 0
+          expectations = [
+            { origin: 1, production: prod_A1, dot: 1 },
+            { origin: 2, production: prod_A1, dot: 0 },
+            { origin: 2, production: prod_A2, dot: 0 }
+          ]
+          compare_state_set(state_set_2, expectations)
 
-        ###################### S(2) == a a . c c
-        state_set_2 = parse_result.chart[2]
-        expect(state_set_2.states.size).to eq(3)
-        # Expectation chart[2]:
-        # 0: A -> "a" . A "c", 1  # scan from S(0) 1
-        # 1: A -> . "a" A "c", 2  # predict from 0
-        # 2: A -> . "b", 2        # predict from 0
-        expectations = { origin: 1, production: prod_A1, dot: 1 }
-        compare_state(state_set_2.states[0], expectations)
+          ###################### S(3) == a a c? c
+          state_set_3 = parse_result.chart[3]
+          expect(state_set_3.states).to be_empty  # This is an error symptom
+        end
+      end # context
 
-        expectations = { origin: 2, production: prod_A1, dot: 0 }
-        compare_state(state_set_2.states[1], expectations)
-
-        expectations = { origin: 2, production: prod_A2, dot: 0 }
-        compare_state(state_set_2.states[2], expectations)
-
-        ###################### S(3) == a a c? c
-        state_set_3 = parse_result.chart[3]
-        expect(state_set_3.states).to be_empty  # This is an error symptom       
-      end
-    end # context
-
-  end # describe
-
+    end # describe
   end # module
 end # module
 
 # End of file
-