#!/usr/bin/env ruby -w # encoding: UTF-8 # # = TextParser.rb -- The TaskJuggler III Project Management Software # # Copyright (c) 2006, 2007, 2008, 2009, 2010 by Chris Schlaeger # # This program is free software; you can redistribute it and/or modify # it under the terms of version 2 of the GNU General Public License as # published by the Free Software Foundation. # require 'TextParser/Pattern' require 'TextParser/Rule' require 'TextParser/StackElement' require 'TjException' require 'Log' class TaskJuggler # The TextParser implements a regular LALR parser. But it uses a recursive # rule traversor instead of the more commonly found state machine generated by # yacc-like tools. Since stack depths is not really an issue with a Ruby # implementation this approach has one big advantage. The syntax of the parser # can be modified during parsing. This allows support for languages that can # extend themself. The TaskJuggler syntax is such an beast. Traditional yacc # generated parsers would fail with such a syntax. # # This class is just a base class. A complete parser would derive from this # class and implement the rule set and the functions _nextToken()_ and # _returnToken()_. It also needs to set the array _variables_ to declare all # variables ($SOMENAME) that the scanner may deliver. # # To describe the syntax the functions TextParser#pattern, TextParser#optional # and TextParser#repeatable can be used. When the rule set is changed during # parsing, TextParser#updateParserTables must be called to make the changes # effective. The parser can also document the syntax automatically. To # document a pattern, the functions TextParser#doc, TextParser#descr, # TextParser#also and TextParser#arg can be used. # # To start parsing the input the function TextParser#parse needs to be called # with the name of the start rule. class TextParser # Utility class so that we can distinguish Array results from the Array # containing the results of a repeatable rule. class TextParserResultArray < Array def initialize super end # If there is a repeatable rule that contains another repeatable loop, the # result of the inner rule is an Array that gets put into another Array by # the outer rule. In this case, the inner Array can be merged with the # outer Array. def <<(arg) if arg.is_a?(TextParserResultArray) self.concat(arg) else super end end end attr_reader :rules # Create a new TextParser object. def initialize @rules = { } # Array to hold the token types that the scanner can return. @variables = [] # An list of token types that are not allowed in the current context. @badVariables = [] # The currently processed rule. @cr = nil end # Limit the allowed tokens of the scanner to the subset passed by the # _tokenSet_ Array. def limitTokenSet(tokenSet) return unless tokenSet @badVariables = @variables.dup @badVariables.delete_if { |v| tokenSet.include?(v) } end # Call all methods that start with 'rule_' to initialize the rules. def initRules methods.each do |m| if m[0, 5] == 'rule_' # Create a new rule with the suffix of the function name as name. newRule(m[5..-1]) # Call the function. send(m) end end end # Add a new rule to the rule set. _name_ must be a unique identifier. The # function also sets the class variable @cr to the new rule. Subsequent # calls to TextParser#pattern, TextParser#optional or # TextParser#repeatable will then implicitely operate on the most recently # added rule. def newRule(name) raise "Fatal Error: Rule #{name} already exists" if @rules.has_key?(name) if block_given? saveCr = @cr @rules[name] = @cr = TextParser::Rule.new(name) yield @cr = saveCr else @rules[name] = @cr = TextParser::Rule.new(name) end end # Add a new pattern to the most recently added rule. _tokens_ is an array of # strings that specify the syntax elements of the pattern. Each token must # start with an character that identifies the type of the token. The # following types are supported. # # * ! a reference to another rule # * $ a variable token as delivered by the scanner # * _ a literal token. # # _func_ is a Proc object that is called whenever the parser has completed # the processing of this rule. def pattern(tokens, func = nil) @cr.addPattern(TextParser::Pattern.new(tokens, func)) end # Identify the patterns of the most recently added rule as optional syntax # elements. def optional @cr.setOptional end # Identify the patterns of the most recently added rule as repeatable syntax # elements. def repeatable @cr.setRepeatable end # This function needs to be called whenever new rules or patterns have been # added and before the next call to TextParser#parse. def updateParserTables @rules.each_value { |rule| rule.transitions = {} } @rules.each_value do |rule| getTransitions(rule) checkRule(rule) end end # To parse the input this function needs to be called with the name of the # rule to start with. It returns the result of the processing function of # the top-level parser rule that was specified by _ruleName_. def parse(ruleName) @stack = [] @@expectedTokens = [] updateParserTables begin result = parseRule(@rules[ruleName]) rescue TjException # error('parse_error', $!.message) return nil end result end # Return the SourceFileInfo of the TextScanner at the beginning of the # currently processed TextParser::Rule. Or return nil if we don't have a # current position. def sourceFileInfo return nil if @stack.empty? @stack.last.sourceFileInfo end def matchingRules(keyword) matches = [] @rules.each do |name, rule| patIdx = rule.matchingPatternIndex('_' + keyword) matches << [ rule, patIdx ] if patIdx end matches end def error(id, text, property = nil) @scanner.error(id, text, property) end def warning(id, text, property = nil) @scanner.warning(id, text, property) end private # getTransitions recursively determines all possible target tokens # that the _rule_ matches. A target token can either be a fixed token # (prefixed with _), a variable token (prefixed with $) or an end token # (just a .). The list of found target tokens is stored in the _transitions_ # list of the rule. For each rule pattern we store the transitions for this # pattern in a token -> rule hash. def getTransitions(rule) # If we have processed this rule before we can just return a copy # of the transitions of this rule. This avoids endless recursions. return rule.transitions.dup unless rule.transitions.empty? rule.transitions = [] rule.patterns.each do |pat| allTokensOptional = true transitions = { } pat.each do |token| tokenId = token[1..-1] if token[0] == ?! unless @rules.has_key?(tokenId) raise "Fatal Error: Unknown reference to #{tokenId} in pattern " + "#{pat} + of rule #{rule.name}" end refRule = @rules[tokenId] # If the referenced rule describes optional content, we need to look # at the next token as well. res = getTransitions(@rules[tokenId]) allTokensOptional = false unless refRule.optional?(@rules) # Combine the hashes for each pattern into a single hash res.each do |pat_i| pat_i.each { |tok, r| transitions[tok] = r } end optional = true if refRule.optional elsif '_$.'.include?(token[0]) transitions[token] = rule allTokensOptional = false else raise 'Fatal Error: Illegal token type specifier used for token' + ": #{tokenId}" end break unless allTokensOptional end # Make sure that we only have one possible transition for each # target. transitions.each do |key, value| rule.transitions.each do |trans| if trans.has_key?(key) raise "Fatal Error: Rule #{rule.name} has ambigeous " + "transitions for target #{key}" end end end rule.transitions << transitions end rule.transitions.dup end def checkRule(rule) if rule.patterns.empty? raise "Rule #{rule.name} must have at least one pattern" end rule.patterns.each do |pat| pat.each do |tok| type = tok[0] token = tok[1..-1] if type == ?$ if @variables.index(token).nil? error('unsupported_token', "The token #{token} is not supported here.") end elsif type == ?! if @rules[token].nil? raise "Fatal Error: Reference to unknown rule #{token} in " + "pattern '#{pat}' of rule #{rule.name}" end end end end end # This function processes the input starting with the syntax description of # _rule_. It recursively calls this function whenever the syntax description # contains the reference to another rule. def parseRule(rule) Log.enter('parseRule', "Parsing with rule #{rule.name}") result = rule.repeatable ? TextParserResultArray.new : nil # Rules can be marked 'repeatable'. This flag will be set to true after # the first iteration has been completed. repeatMode = false loop do # At the beginning of a rule we need a token from the input to determine # which pattern of the rule needs to be processed. token = getNextToken # The scanner cannot differentiate between keywords and identifiers. So # whenever an identifier is returned we have to see if we have a # matching keyword first. If none is found, then look for normal # identifiers. if token[0] == 'ID' if (patIdx = rule.matchingPatternIndex('_' + token[1])).nil? patIdx = rule.matchingPatternIndex("$ID") end elsif token[0] == 'LITERAL' patIdx = rule.matchingPatternIndex('_' + token[1]) elsif token[0] == false patIdx = rule.matchingPatternIndex('.') else patIdx = rule.matchingPatternIndex('$' + token[0]) end # If no matching pattern is found for the token we have to check if the # rule is optional or we are in repeat mode. If this is the case, return # the token back to the scanner. Otherwise we have found a token we # cannot handle at this point. if patIdx.nil? # Append the list of expected tokens to the @@expectedToken array. # This may be used in a later rule to provide more details when an # error occured. rule.transitions.each do |transition| keys = transition.keys keys.collect! { |key| key[1..-1] } @@expectedTokens += keys @@expectedTokens.sort! end unless rule.optional?(@rules) || repeatMode error('unexpctd_token', (token[0] != false ? "Unexpected token '#{token[1]}' of type '#{token[0]}'. " : "Unexpected end of file in #{@scanner.fileName}. ") + (@@expectedTokens.length > 1 ? "Expecting one of #{@@expectedTokens.join(', ')}" : "Expecting #{@@expectedTokens[0]}")) end returnToken(token) Log.exit('parseRule', "Finished rule #{rule.name}") return result end pattern = rule.pattern(patIdx) @stack << TextParser::StackElement.new(rule, pattern.function, @scanner.sourceFileInfo) pattern.each do |element| # Separate the type and token text for pattern element. elType = element[0] elToken = element[1..-1] if elType == ?! # The element is a reference to another rule. Return the token if we # still have one and continue with the referenced rule. unless token.nil? returnToken(token) token = nil end @stack.last.store(parseRule(@rules[elToken]), @scanner.sourceFileInfo) else # In case the element is a keyword or variable we have to get a new # token if we don't have one anymore. if token.nil? token = getNextToken end if elType == ?_ # If the element requires a keyword the token must match this # keyword. if elToken != token[1] text = "'#{elToken}' expected but found " + "'#{token[1]}' (#{token[0]})." unless @@expectedTokens.empty? text = "#{@@expectedTokens.join(', ')} or " + text end error('spec_keywork_expctd', text) end @stack.last.store(elToken, @scanner.sourceFileInfo) elsif elType == ?. if token != [ '.', '' ] error('end_expected', 'End expected but found ' + "'#{token[1]}' (#{token[0]}).") end else # The token must match the expected variable type. if token[0] != elToken text = "'#{elToken}' expected but found " + "'#{token[1]}' (#{token[0]})." unless @@expectedTokens.empty? text = "#{@@expectedTokens.join(', ')} or " + text end error('spec_token_expctd', text) end # If the element is a variable store the value of the token. @stack.last.store(token[1], @scanner.sourceFileInfo) end # The token has been consumed. Reset the variable. token = nil @@expectedTokens = [] end end # Once the complete pattern has been processed we call the processing # function for this pattern to operate on the value array. Then pop the # entry for this rule from the stack. @val = @stack.last.val res = nil res = @stack.last.function.call unless @stack.last.function.nil? @stack.pop # If the rule is not repeatable we can store the result and break the # outer loop to exit the function. unless rule.repeatable result = res break end # Otherwise we append the result to the result array and turn repeat # mode on. result << res repeatMode = true end Log.exit('parseRule', "Finished rule #{rule.name}") return result end def getNextToken begin token = nextToken Log << "Token: [#{token[0]}][#{token[1]}]" rescue TjException error('parse_rule', $!.message) end if @badVariables.include?(token[0]) error('unsupported_token', "The token #{token[1]} is not supported in this context.") end token end end end