lib/TextParser.rb in taskjuggler-0.0.5 vs lib/TextParser.rb in taskjuggler-0.0.6

- old
+ new

@@ -159,34 +159,25 @@ def parse(ruleName) @stack = [] @@expectedTokens = [] updateParserTables begin - result = parseRule(@rules[ruleName]) + result = parseRuleR(@rules[ruleName]) rescue TjException # error('parse_error', $!.message) return nil end result end - # Return true if the scanner has processed all files. - def checkForEnd - token = @scanner.nextToken - unless token[0] == '.' - error('junk_at_expected_eof', - "Found garbage at expected end of file: #{token[1]}") - end - end - # Return the SourceFileInfo of the TextScanner at the beginning of the # currently processed TextParser::Rule. Or return nil if we don't have a # current position. def sourceFileInfo return nil if @stack.empty? - @stack.last.sourceFileInfo + @stack.last.sourceFileInfo[0] end def matchingRules(keyword) matches = [] @rules.each do |name, rule| @@ -194,16 +185,22 @@ matches << [ rule, patIdx ] if patIdx end matches end - def error(id, text, property = nil) - @scanner.error(id, text, property) + def error(id, text, property = nil, sfi = nil) + if @scanner + @scanner.error(id, text, property, sfi) + else + message = Message.new(id, 'error', text, property, sfi) + @messageHandler.send(message) + raise TjException.new, '' + end end - def warning(id, text, property = nil) - @scanner.warning(id, text, property) + def warning(id, text, property = nil, sfi = nil) + @scanner.warning(id, text, property, sfi) end private # getTransitions recursively determines all possible target tokens @@ -272,11 +269,12 @@ type = tok[0] token = tok[1..-1] if type == ?$ if @variables.index(token).nil? error('unsupported_token', - "The token #{token} is not supported here.") + "The token #{token} is not supported here.", nil, + token[2]) end elsif type == ?! if @rules[token].nil? raise "Fatal Error: Reference to unknown rule #{token} in " + "pattern '#{pat}' of rule #{rule.name}" @@ -287,130 +285,62 @@ end # This function processes the input starting with the syntax description of # _rule_. It recursively calls this function whenever the syntax description # contains the reference to another rule. - def parseRule(rule) - Log.enter('parseRule', "Parsing with rule #{rule.name}") + # This recursive version has cleaner code and is about 8% faster than + # parseRuleNR. + def parseRuleR(rule) + #Log.enter('parseRuleR', "Parsing with rule #{rule.name}") result = rule.repeatable ? TextParserResultArray.new : nil # Rules can be marked 'repeatable'. This flag will be set to true after # the first iteration has been completed. repeatMode = false loop do # At the beginning of a rule we need a token from the input to determine # which pattern of the rule needs to be processed. token = getNextToken - # The scanner cannot differentiate between keywords and identifiers. So - # whenever an identifier is returned we have to see if we have a - # matching keyword first. If none is found, then look for normal - # identifiers. - if token[0] == 'ID' - if (patIdx = rule.matchingPatternIndex('_' + token[1])).nil? - patIdx = rule.matchingPatternIndex("$ID") - end - elsif token[0] == 'LITERAL' - patIdx = rule.matchingPatternIndex('_' + token[1]) - elsif token[0] == false - patIdx = rule.matchingPatternIndex('.') - else - patIdx = rule.matchingPatternIndex('$' + token[0]) - end + return result unless (pattern = findPattern(rule, token, repeatMode)) + # The @stack will store the resulting value of each element in the + # pattern. + @stack << TextParser::StackElement.new(pattern.function) - # If no matching pattern is found for the token we have to check if the - # rule is optional or we are in repeat mode. If this is the case, return - # the token back to the scanner. Otherwise we have found a token we - # cannot handle at this point. - if patIdx.nil? - # Append the list of expected tokens to the @@expectedToken array. - # This may be used in a later rule to provide more details when an - # error occured. - rule.transitions.each do |transition| - keys = transition.keys - keys.collect! { |key| key[1..-1] } - @@expectedTokens += keys - @@expectedTokens.sort! - end - - unless rule.optional?(@rules) || repeatMode - error('unexpctd_token', - (token[0] != false ? - "Unexpected token '#{token[1]}' of type '#{token[0]}'. " : - "Unexpected end of file in #{@scanner.fileName}. ") + - (@@expectedTokens.length > 1 ? - "Expecting one of #{@@expectedTokens.join(', ')}" : - "Expecting #{@@expectedTokens[0]}")) - end - returnToken(token) - Log.exit('parseRule', "Finished rule #{rule.name}") - return result - end - - pattern = rule.pattern(patIdx) - @stack << TextParser::StackElement.new(rule, pattern.function, - @scanner.sourceFileInfo) - pattern.each do |element| # Separate the type and token text for pattern element. elType = element[0] elToken = element[1..-1] if elType == ?! - # The element is a reference to another rule. Return the token if we - # still have one and continue with the referenced rule. + # The element is a reference to another rule. Return the token if + # we still have one and continue with the referenced rule. unless token.nil? + sfi = token[2] returnToken(token) token = nil + else + sfi = nil end - @stack.last.store(parseRule(@rules[elToken]), - @scanner.sourceFileInfo) + @stack.last.store(parseRuleR(@rules[elToken]), sfi) + #Log << "Resuming rule #{rule.name}" else # In case the element is a keyword or variable we have to get a new # token if we don't have one anymore. - if token.nil? - token = getNextToken - end + token = getNextToken unless token - if elType == ?_ - # If the element requires a keyword the token must match this - # keyword. - if elToken != token[1] - text = "'#{elToken}' expected but found " + - "'#{token[1]}' (#{token[0]})." - unless @@expectedTokens.empty? - text = "#{@@expectedTokens.join(', ')} or " + text - end - error('spec_keywork_expctd', text) - end - @stack.last.store(elToken, @scanner.sourceFileInfo) - elsif elType == ?. - if token != [ '.', '<END>' ] - error('end_expected', 'End expected but found ' + - "'#{token[1]}' (#{token[0]}).") - end - else - # The token must match the expected variable type. - if token[0] != elToken - text = "'#{elToken}' expected but found " + - "'#{token[1]}' (#{token[0]})." - unless @@expectedTokens.empty? - text = "#{@@expectedTokens.join(', ')} or " + text - end - error('spec_token_expctd', text) - end - # If the element is a variable store the value of the token. - @stack.last.store(token[1], @scanner.sourceFileInfo) - end + processNormalElements(elType, elToken, token) + # The token has been consumed. Reset the variable. token = nil @@expectedTokens = [] end end # Once the complete pattern has been processed we call the processing # function for this pattern to operate on the value array. Then pop the # entry for this rule from the stack. @val = @stack.last.val + @sourceFileInfo = @stack.last.sourceFileInfo res = nil res = @stack.last.function.call unless @stack.last.function.nil? @stack.pop # If the rule is not repeatable we can store the result and break the @@ -424,25 +354,257 @@ # mode on. result << res repeatMode = true end - Log.exit('parseRule', "Finished rule #{rule.name}") + #Log.exit('parseRuleR', "Finished rule #{rule.name}") return result end + # This function processes the input starting with the syntax description + # of _rule_. It's implemented as an unrolled recursion. It recursively + # iterates over the rule tree as controlled by the input file. + # This version is not limited by the size of the system stack. So far, I'm + # not aware of any project that is too large for the system stack. Since + # the recursive version parseRuleR is about 8% faster and has cleaner + # code, we use that by default. + def parseRuleNR(rule) + elementIdx = 0 + recursionResult = nil + # These flags are used to managed the control flow to and from the + # recursion point. + recur = resume = false + # The stack that holds the context for the recursion levels. It's either + # just a rule to start a new recursion or an Array of state variables. + recursionStack = [ rule ] + begin + # Pop the top entry from the recursion stack. + se = recursionStack.pop + if se.is_a?(Array) + # We have essentially finished a recursion level and need to get + # back to the place where we started the recursion. First, we need + # to restore the state again. + rule, pattern, elementIdx, result, repeatMode, sfi = se + #Log << "Recursion loop started in resume mode for rule #{rule.name}" + # Now jump to the recursion point without doing anything else. + resume = true + else + # Start a new recursion level. The rule tells us how to interpret + # the input text. + rule = se + #Log.enter('parseRuleNR', "Parsing with rule #{rule.name}") + resume = false + end + + unless resume + result = rule.repeatable ? TextParserResultArray.new : nil + # Rules can be marked 'repeatable'. This flag will be set to true + # after the first iteration has been completed. + repeatMode = false + end + + loop do + unless resume + # At the beginning of a rule we need a token from the input to + # determine which pattern of the rule needs to be processed. + token = getNextToken + + break unless (pattern = findPattern(rule, token, repeatMode)) + # The @stack will store the resulting value of each element in the + # pattern. + @stack << TextParser::StackElement.new(pattern.function) + + # Once we've found the right pattern, we need to process each + # element. + elementIdx = 0 + end + + elementCount = pattern.length + while elementIdx < elementCount + element = pattern[elementIdx] + # Separate the type and token text for pattern element. + elType = element[0] + elToken = element[1..-1] + if elType == ?! + unless resume + # The element is a reference to another rule. Return the token + # if we still have one and continue with the referenced rule. + if token + sfi = token[2] + returnToken(token) + token = nil + else + sfi = nil + end + # This is where the recursion would happen. Instead, we push + # the state variables and then the next rule onto the + # recursion stack. + recursionStack.push([ rule, pattern, elementIdx, result, + repeatMode, sfi ]) + recursionStack.push(@rules[elToken]) + # Now terminate all but the outer loops without doing anything + # else. + recur = true + break + else + # We're back right after where the recursion started. Store + # the result and turn resume mode off again. + @stack.last.store(recursionResult, sfi) + resume = false + end + else + # In case the element is a keyword or variable we have to get a + # new token if we don't have one anymore. + token = getNextToken unless token + + processNormalElements(elType, elToken, token) + + # The token has been consumed. Reset the variable. + token = nil + @@expectedTokens = [] + end + elementIdx += 1 + end # of pattern while loop + + # Skip the rest of the loop in recur mode. + break if recur + + elementIdx = 0 + + # Once the complete pattern has been processed we call the + # processing function for this pattern to operate on the value + # array. Then pop the entry for this rule from the stack. The + # called function will use @val and @sourceFileInfo to retrieve + # data from the parser. + @val = @stack.last.val + @sourceFileInfo = @stack.last.sourceFileInfo + res = @stack.last.function ? @stack.last.function.call : nil + @stack.pop + + # If the rule is not repeatable we can store the result and break + # the outer loop to exit the function. + unless rule.repeatable + result = res + break + end + + # Otherwise we append the result to the result array and turn repeat + # mode on. + result << res + # We have completed the first iteration. Set the repeat mode flag to + # indicate that further iterations are already re-runs. + repeatMode = true + end # of rule processing loop + + if recur + recur = false + else + #Log.exit('parseRuleNR', "Finished rule #{rule.name}") + recursionResult = result + end + end while !recursionStack.empty? + + return result + end + def getNextToken begin token = nextToken - Log << "Token: [#{token[0]}][#{token[1]}]" + #Log << "Token: [#{token[0]}][#{token[1]}]" rescue TjException error('parse_rule', $!.message) end if @badVariables.include?(token[0]) error('unsupported_token', - "The token #{token[1]} is not supported in this context.") + "The token #{token[1]} is not supported in this context.", + nil, token[2]) end token + end + + def findPattern(rule, token, repeatMode) + # The scanner cannot differentiate between keywords and identifiers. So + # whenever an identifier is returned we have to see if we have a + # matching keyword first. If none is found, then look for normal + # identifiers. + if token[0] == 'ID' + if (patIdx = rule.matchingPatternIndex('_' + token[1])).nil? + patIdx = rule.matchingPatternIndex("$ID") + end + elsif token[0] == 'LITERAL' + patIdx = rule.matchingPatternIndex('_' + token[1]) + elsif token[0] == false + patIdx = rule.matchingPatternIndex('.') + else + patIdx = rule.matchingPatternIndex('$' + token[0]) + end + + # If no matching pattern is found for the token we have to check if the + # rule is optional or we are in repeat mode. If this is the case, return + # the token back to the scanner. Otherwise, we have found a token we + # cannot handle at this point. + if patIdx.nil? + # Append the list of expected tokens to the @@expectedToken array. + # This may be used in a later rule to provide more details when an + # error occured. + rule.transitions.each do |transition| + keys = transition.keys + keys.collect! { |key| key[1..-1] } + @@expectedTokens += keys + @@expectedTokens.sort! + end + + unless rule.optional?(@rules) || repeatMode + error('unexpctd_token', + (token[0] != false ? + "Unexpected token '#{token[1]}' of type " + + "'#{token[0]}'. " : + "Unexpected end of file in #{@scanner.fileName}. ") + + (@@expectedTokens.length > 1 ? + "Expecting one of #{@@expectedTokens.join(', ')}" : + "Expecting #{@@expectedTokens[0]}")) + end + returnToken(token) + return nil + end + + rule.pattern(patIdx) + end + + # Handle the elements that don't trigger a recursion. + def processNormalElements(elType, elToken, token) + if elType == ?_ + # If the element requires a keyword the token must match this + # keyword. + if elToken != token[1] + text = "'#{elToken}' expected but found " + + "'#{token[1]}' (#{token[0]})." + unless @@expectedTokens.empty? + text = "#{@@expectedTokens.join(', ')} or " + text + end + error('spec_keywork_expctd', text) + end + @stack.last.store(elToken, token[2]) + elsif elType == ?. + if token[0..1] != [ '.', '<END>' ] + error('end_expected', + "Found garbage at expected end of file: #{token[1]}\n" + + "If you see this in the middle of your file, you probably " + + "have closed your context too early.") + end + else + # The token must match the expected variable type. + if token[0] != elToken + text = "'#{elToken}' expected but found " + + "'#{token[1]}' (#{token[0]})." + unless @@expectedTokens.empty? + text = "#{@@expectedTokens.join(', ')} or " + text + end + error('spec_token_expctd', text) + end + # If the element is a variable store the value of the token. + @stack.last.store(token[1], token[2]) + end end end end