TextParser.rb in taskjuggler-0.0.6

- old
+ new

@@ -159,34 +159,25 @@
     def parse(ruleName)
       @stack = []
       @@expectedTokens = []
       updateParserTables
       begin
-        result = parseRule(@rules[ruleName])
+        result = parseRuleR(@rules[ruleName])
       rescue TjException
         # error('parse_error', $!.message)
         return nil
       end
 
       result
     end
 
-    # Return true if the scanner has processed all files.
-    def checkForEnd
-      token = @scanner.nextToken
-      unless token[0] == '.'
-        error('junk_at_expected_eof',
-              "Found garbage at expected end of file: #{token[1]}")
-      end
-    end
-
     # Return the SourceFileInfo of the TextScanner at the beginning of the
     # currently processed TextParser::Rule. Or return nil if we don't have a
     # current position.
     def sourceFileInfo
       return nil if @stack.empty?
-      @stack.last.sourceFileInfo
+      @stack.last.sourceFileInfo[0]
     end
 
     def matchingRules(keyword)
       matches = []
       @rules.each do |name, rule|
@@ -194,16 +185,22 @@
         matches << [ rule, patIdx ] if patIdx
       end
       matches
     end
 
-    def error(id, text, property = nil)
-      @scanner.error(id, text, property)
+    def error(id, text, property = nil, sfi = nil)
+      if @scanner
+        @scanner.error(id, text, property, sfi)
+      else
+        message = Message.new(id, 'error', text, property, sfi)
+        @messageHandler.send(message)
+        raise TjException.new, ''
+      end
     end
 
-    def warning(id, text, property = nil)
-      @scanner.warning(id, text, property)
+    def warning(id, text, property = nil, sfi = nil)
+      @scanner.warning(id, text, property, sfi)
     end
 
   private
 
     # getTransitions recursively determines all possible target tokens
@@ -272,11 +269,12 @@
           type = tok[0]
           token = tok[1..-1]
           if type == ?$
             if @variables.index(token).nil?
               error('unsupported_token',
-                    "The token #{token} is not supported here.")
+                    "The token #{token} is not supported here.", nil,
+                    token[2])
             end
           elsif type == ?!
             if @rules[token].nil?
               raise "Fatal Error: Reference to unknown rule #{token} in " +
                     "pattern '#{pat}' of rule #{rule.name}"
@@ -287,130 +285,62 @@
     end
 
     # This function processes the input starting with the syntax description of
     # _rule_. It recursively calls this function whenever the syntax description
     # contains the reference to another rule.
-    def parseRule(rule)
-      Log.enter('parseRule', "Parsing with rule #{rule.name}")
+    # This recursive version has cleaner code and is about 8% faster than
+    # parseRuleNR.
+    def parseRuleR(rule)
+      #Log.enter('parseRuleR', "Parsing with rule #{rule.name}")
       result = rule.repeatable ? TextParserResultArray.new : nil
       # Rules can be marked 'repeatable'. This flag will be set to true after
       # the first iteration has been completed.
       repeatMode = false
       loop do
         # At the beginning of a rule we need a token from the input to determine
         # which pattern of the rule needs to be processed.
         token = getNextToken
 
-        # The scanner cannot differentiate between keywords and identifiers. So
-        # whenever an identifier is returned we have to see if we have a
-        # matching keyword first. If none is found, then look for normal
-        # identifiers.
-        if token[0] == 'ID'
-          if (patIdx = rule.matchingPatternIndex('_' + token[1])).nil?
-            patIdx = rule.matchingPatternIndex("$ID")
-          end
-        elsif token[0] == 'LITERAL'
-          patIdx = rule.matchingPatternIndex('_' + token[1])
-        elsif token[0] == false
-          patIdx = rule.matchingPatternIndex('.')
-        else
-          patIdx = rule.matchingPatternIndex('$' + token[0])
-        end
+        return result unless (pattern = findPattern(rule, token, repeatMode))
+        # The @stack will store the resulting value of each element in the
+        # pattern.
+        @stack << TextParser::StackElement.new(pattern.function)
 
-        # If no matching pattern is found for the token we have to check if the
-        # rule is optional or we are in repeat mode. If this is the case, return
-        # the token back to the scanner. Otherwise we have found a token we
-        # cannot handle at this point.
-        if patIdx.nil?
-          # Append the list of expected tokens to the @@expectedToken array.
-          # This may be used in a later rule to provide more details when an
-          # error occured.
-          rule.transitions.each do |transition|
-            keys = transition.keys
-            keys.collect! { |key| key[1..-1] }
-            @@expectedTokens += keys
-            @@expectedTokens.sort!
-          end
-
-          unless rule.optional?(@rules) || repeatMode
-            error('unexpctd_token',
-                  (token[0] != false ?
-                   "Unexpected token '#{token[1]}' of type '#{token[0]}'. " :
-                   "Unexpected end of file in #{@scanner.fileName}. ") +
-                  (@@expectedTokens.length > 1 ?
-                   "Expecting one of #{@@expectedTokens.join(', ')}" :
-                   "Expecting #{@@expectedTokens[0]}"))
-          end
-          returnToken(token)
-          Log.exit('parseRule', "Finished rule #{rule.name}")
-          return result
-        end
-
-        pattern = rule.pattern(patIdx)
-        @stack << TextParser::StackElement.new(rule, pattern.function,
-                                               @scanner.sourceFileInfo)
-
         pattern.each do |element|
           # Separate the type and token text for pattern element.
           elType = element[0]
           elToken = element[1..-1]
           if elType == ?!
-            # The element is a reference to another rule. Return the token if we
-            # still have one and continue with the referenced rule.
+            # The element is a reference to another rule. Return the token if
+            # we still have one and continue with the referenced rule.
             unless token.nil?
+              sfi = token[2]
               returnToken(token)
               token = nil
+            else
+              sfi = nil
             end
-            @stack.last.store(parseRule(@rules[elToken]),
-                              @scanner.sourceFileInfo)
+            @stack.last.store(parseRuleR(@rules[elToken]), sfi)
+            #Log << "Resuming rule #{rule.name}"
           else
             # In case the element is a keyword or variable we have to get a new
             # token if we don't have one anymore.
-            if token.nil?
-              token = getNextToken
-            end
+            token = getNextToken unless token
 
-            if elType == ?_
-              # If the element requires a keyword the token must match this
-              # keyword.
-              if elToken != token[1]
-                text = "'#{elToken}' expected but found " +
-                       "'#{token[1]}' (#{token[0]})."
-                unless @@expectedTokens.empty?
-                  text = "#{@@expectedTokens.join(', ')} or " + text
-                end
-                error('spec_keywork_expctd', text)
-              end
-              @stack.last.store(elToken, @scanner.sourceFileInfo)
-            elsif elType == ?.
-              if token != [ '.', '<END>' ]
-                error('end_expected', 'End expected but found ' +
-                      "'#{token[1]}' (#{token[0]}).")
-              end
-            else
-              # The token must match the expected variable type.
-              if token[0] != elToken
-                text = "'#{elToken}' expected but found " +
-                       "'#{token[1]}' (#{token[0]})."
-                unless @@expectedTokens.empty?
-                  text = "#{@@expectedTokens.join(', ')} or " + text
-                end
-                error('spec_token_expctd', text)
-              end
-              # If the element is a variable store the value of the token.
-              @stack.last.store(token[1], @scanner.sourceFileInfo)
-            end
+            processNormalElements(elType, elToken, token)
+
             # The token has been consumed. Reset the variable.
             token = nil
             @@expectedTokens = []
           end
         end
 
         # Once the complete pattern has been processed we call the processing
         # function for this pattern to operate on the value array. Then pop the
         # entry for this rule from the stack.
         @val = @stack.last.val
+        @sourceFileInfo = @stack.last.sourceFileInfo
         res = nil
         res = @stack.last.function.call unless @stack.last.function.nil?
         @stack.pop
 
         # If the rule is not repeatable we can store the result and break the
@@ -424,25 +354,257 @@
         # mode on.
         result << res
         repeatMode = true
       end
 
-      Log.exit('parseRule', "Finished rule #{rule.name}")
+      #Log.exit('parseRuleR', "Finished rule #{rule.name}")
       return result
     end
 
+    # This function processes the input starting with the syntax description
+    # of _rule_. It's implemented as an unrolled recursion.  It recursively
+    # iterates over the rule tree as controlled by the input file.
+    # This version is not limited by the size of the system stack. So far, I'm
+    # not aware of any project that is too large for the system stack. Since
+    # the recursive version parseRuleR is about 8% faster and has cleaner
+    # code, we use that by default.
+    def parseRuleNR(rule)
+      elementIdx = 0
+      recursionResult = nil
+      # These flags are used to managed the control flow to and from the
+      # recursion point.
+      recur = resume = false
+      # The stack that holds the context for the recursion levels. It's either
+      # just a rule to start a new recursion or an Array of state variables.
+      recursionStack = [ rule ]
+      begin
+        # Pop the top entry from the recursion stack.
+        se = recursionStack.pop
+        if se.is_a?(Array)
+          # We have essentially finished a recursion level and need to get
+          # back to the place where we started the recursion. First, we need
+          # to restore the state again.
+          rule, pattern, elementIdx, result, repeatMode, sfi = se
+          #Log << "Recursion loop started in resume mode for rule #{rule.name}"
+          # Now jump to the recursion point without doing anything else.
+          resume = true
+        else
+          # Start a new recursion level. The rule tells us how to interpret
+          # the input text.
+          rule = se
+          #Log.enter('parseRuleNR', "Parsing with rule #{rule.name}")
+          resume = false
+        end
+
+        unless resume
+          result = rule.repeatable ? TextParserResultArray.new : nil
+          # Rules can be marked 'repeatable'. This flag will be set to true
+          # after the first iteration has been completed.
+          repeatMode = false
+        end
+
+        loop do
+          unless resume
+            # At the beginning of a rule we need a token from the input to
+            # determine which pattern of the rule needs to be processed.
+            token = getNextToken
+
+            break unless (pattern = findPattern(rule, token, repeatMode))
+            # The @stack will store the resulting value of each element in the
+            # pattern.
+            @stack << TextParser::StackElement.new(pattern.function)
+
+            # Once we've found the right pattern, we need to process each
+            # element.
+            elementIdx = 0
+          end
+
+          elementCount = pattern.length
+          while elementIdx < elementCount
+            element = pattern[elementIdx]
+            # Separate the type and token text for pattern element.
+            elType = element[0]
+            elToken = element[1..-1]
+            if elType == ?!
+              unless resume
+                # The element is a reference to another rule. Return the token
+                # if we still have one and continue with the referenced rule.
+                if token
+                  sfi = token[2]
+                  returnToken(token)
+                  token = nil
+                else
+                  sfi = nil
+                end
+                # This is where the recursion would happen. Instead, we push
+                # the state variables and then the next rule onto the
+                # recursion stack.
+                recursionStack.push([ rule, pattern, elementIdx, result,
+                                      repeatMode, sfi ])
+                recursionStack.push(@rules[elToken])
+                # Now terminate all but the outer loops without doing anything
+                # else.
+                recur = true
+                break
+              else
+                # We're back right after where the recursion started. Store
+                # the result and turn resume mode off again.
+                @stack.last.store(recursionResult, sfi)
+                resume = false
+              end
+            else
+              # In case the element is a keyword or variable we have to get a
+              # new token if we don't have one anymore.
+              token = getNextToken unless token
+
+              processNormalElements(elType, elToken, token)
+
+              # The token has been consumed. Reset the variable.
+              token = nil
+              @@expectedTokens = []
+            end
+            elementIdx += 1
+          end # of pattern while loop
+
+          # Skip the rest of the loop in recur mode.
+          break if recur
+
+          elementIdx = 0
+
+          # Once the complete pattern has been processed we call the
+          # processing function for this pattern to operate on the value
+          # array. Then pop the entry for this rule from the stack. The
+          # called function will use @val and @sourceFileInfo to retrieve
+          # data from the parser.
+          @val = @stack.last.val
+          @sourceFileInfo = @stack.last.sourceFileInfo
+          res = @stack.last.function ? @stack.last.function.call : nil
+          @stack.pop
+
+          # If the rule is not repeatable we can store the result and break
+          # the outer loop to exit the function.
+          unless rule.repeatable
+            result = res
+            break
+          end
+
+          # Otherwise we append the result to the result array and turn repeat
+          # mode on.
+          result << res
+          # We have completed the first iteration. Set the repeat mode flag to
+          # indicate that further iterations are already re-runs.
+          repeatMode = true
+        end # of rule processing loop
+
+        if recur
+          recur = false
+        else
+          #Log.exit('parseRuleNR', "Finished rule #{rule.name}")
+          recursionResult = result
+        end
+      end while !recursionStack.empty?
+
+      return result
+    end
+
     def getNextToken
       begin
         token = nextToken
-        Log << "Token: [#{token[0]}][#{token[1]}]"
+        #Log << "Token: [#{token[0]}][#{token[1]}]"
       rescue TjException
         error('parse_rule', $!.message)
       end
       if @badVariables.include?(token[0])
         error('unsupported_token',
-              "The token #{token[1]} is not supported in this context.")
+              "The token #{token[1]} is not supported in this context.",
+              nil, token[2])
       end
       token
+    end
+
+    def findPattern(rule, token, repeatMode)
+      # The scanner cannot differentiate between keywords and identifiers.  So
+      # whenever an identifier is returned we have to see if we have a
+      # matching keyword first. If none is found, then look for normal
+      # identifiers.
+      if token[0] == 'ID'
+        if (patIdx = rule.matchingPatternIndex('_' + token[1])).nil?
+          patIdx = rule.matchingPatternIndex("$ID")
+        end
+      elsif token[0] == 'LITERAL'
+        patIdx = rule.matchingPatternIndex('_' + token[1])
+      elsif token[0] == false
+        patIdx = rule.matchingPatternIndex('.')
+      else
+        patIdx = rule.matchingPatternIndex('$' + token[0])
+      end
+
+      # If no matching pattern is found for the token we have to check if the
+      # rule is optional or we are in repeat mode. If this is the case, return
+      # the token back to the scanner. Otherwise, we have found a token we
+      # cannot handle at this point.
+      if patIdx.nil?
+        # Append the list of expected tokens to the @@expectedToken array.
+        # This may be used in a later rule to provide more details when an
+        # error occured.
+        rule.transitions.each do |transition|
+          keys = transition.keys
+          keys.collect! { |key| key[1..-1] }
+          @@expectedTokens += keys
+          @@expectedTokens.sort!
+        end
+
+        unless rule.optional?(@rules) || repeatMode
+          error('unexpctd_token',
+                (token[0] != false ?
+                 "Unexpected token '#{token[1]}' of type " +
+                 "'#{token[0]}'. " :
+                 "Unexpected end of file in #{@scanner.fileName}. ") +
+                (@@expectedTokens.length > 1 ?
+                 "Expecting one of #{@@expectedTokens.join(', ')}" :
+                 "Expecting #{@@expectedTokens[0]}"))
+        end
+        returnToken(token)
+        return nil
+      end
+
+      rule.pattern(patIdx)
+    end
+
+    # Handle the elements that don't trigger a recursion.
+    def processNormalElements(elType, elToken, token)
+      if elType == ?_
+        # If the element requires a keyword the token must match this
+        # keyword.
+        if elToken != token[1]
+          text = "'#{elToken}' expected but found " +
+                 "'#{token[1]}' (#{token[0]})."
+          unless @@expectedTokens.empty?
+            text = "#{@@expectedTokens.join(', ')} or " + text
+          end
+          error('spec_keywork_expctd', text)
+        end
+        @stack.last.store(elToken, token[2])
+      elsif elType == ?.
+        if token[0..1] != [ '.', '<END>' ]
+          error('end_expected',
+                "Found garbage at expected end of file: #{token[1]}\n" +
+                "If you see this in the middle of your file, you probably " +
+                "have closed your context too early.")
+        end
+      else
+        # The token must match the expected variable type.
+        if token[0] != elToken
+          text = "'#{elToken}' expected but found " +
+                 "'#{token[1]}' (#{token[0]})."
+          unless @@expectedTokens.empty?
+            text = "#{@@expectedTokens.join(', ')} or " + text
+          end
+          error('spec_token_expctd', text)
+        end
+        # If the element is a variable store the value of the token.
+        @stack.last.store(token[1], token[2])
+      end
     end
 
   end
 
 end