#!/usr/bin/env ruby -w
# encoding: UTF-8
#
# = TextParser.rb -- The TaskJuggler III Project Management Software
#
# Copyright (c) 2006, 2007, 2008, 2009, 2010 by Chris Schlaeger <cs@kde.org>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#

require 'TextParser/Pattern'
require 'TextParser/Rule'
require 'TextParser/StackElement'
require 'TjException'
require 'Log'

class TaskJuggler

  # The TextParser implements a regular LALR parser. But it uses a recursive
  # rule traversor instead of the more commonly found state machine generated by
  # yacc-like tools. Since stack depths is not really an issue with a Ruby
  # implementation this approach has one big advantage. The syntax of the parser
  # can be modified during parsing. This allows support for languages that can
  # extend themself. The TaskJuggler syntax is such an beast. Traditional yacc
  # generated parsers would fail with such a syntax.
  #
  # This class is just a base class. A complete parser would derive from this
  # class and implement the rule set and the functions _nextToken()_ and
  # _returnToken()_. It also needs to set the array _variables_ to declare all
  # variables ($SOMENAME) that the scanner may deliver.
  #
  # To describe the syntax the functions TextParser#pattern, TextParser#optional
  # and TextParser#repeatable can be used. When the rule set is changed during
  # parsing, TextParser#updateParserTables must be called to make the changes
  # effective. The parser can also document the syntax automatically. To
  # document a pattern, the functions TextParser#doc, TextParser#descr,
  # TextParser#also and TextParser#arg can be used.
  #
  # To start parsing the input the function TextParser#parse needs to be called
  # with the name of the start rule.
  class TextParser

    # Utility class so that we can distinguish Array results from the Array
    # containing the results of a repeatable rule.
    class TextParserResultArray < Array

      def initialize
        super
      end

      # If there is a repeatable rule that contains another repeatable loop, the
      # result of the inner rule is an Array that gets put into another Array by
      # the outer rule. In this case, the inner Array can be merged with the
      # outer Array.
      def <<(arg)
        if arg.is_a?(TextParserResultArray)
          self.concat(arg)
        else
          super
        end
      end

    end

    attr_reader :rules

    # Create a new TextParser object.
    def initialize
      @rules = { }
      # Array to hold the token types that the scanner can return.
      @variables = []
      # An list of token types that are not allowed in the current context.
      @badVariables = []
      # The currently processed rule.
      @cr = nil
    end

    # Limit the allowed tokens of the scanner to the subset passed by the
    # _tokenSet_ Array.
    def limitTokenSet(tokenSet)
      return unless tokenSet

      @badVariables = @variables.dup
      @badVariables.delete_if { |v| tokenSet.include?(v) }
    end

    # Call all methods that start with 'rule_' to initialize the rules.
    def initRules
      methods.each do |m|
        if m[0, 5] == 'rule_'
          # Create a new rule with the suffix of the function name as name.
          newRule(m[5..-1])
          # Call the function.
          send(m)
        end
      end
    end

    # Add a new rule to the rule set. _name_ must be a unique identifier. The
    # function also sets the class variable @cr to the new rule. Subsequent
    # calls to TextParser#pattern, TextParser#optional or
    # TextParser#repeatable will then implicitely operate on the most recently
    # added rule.
    def newRule(name)
      raise "Fatal Error: Rule #{name} already exists" if @rules.has_key?(name)

      if block_given?
        saveCr = @cr
        @rules[name] = @cr = TextParser::Rule.new(name)
        yield
        @cr = saveCr
      else
        @rules[name] = @cr = TextParser::Rule.new(name)
      end
    end

    # Add a new pattern to the most recently added rule. _tokens_ is an array of
    # strings that specify the syntax elements of the pattern. Each token must
    # start with an character that identifies the type of the token. The
    # following types are supported.
    #
    # * ! a reference to another rule
    # * $ a variable token as delivered by the scanner
    # * _ a literal token.
    #
    # _func_ is a Proc object that is called whenever the parser has completed
    # the processing of this rule.
    def pattern(tokens, func = nil)
      @cr.addPattern(TextParser::Pattern.new(tokens, func))
    end

    # Identify the patterns of the most recently added rule as optional syntax
    # elements.
    def optional
      @cr.setOptional
    end

    # Identify the patterns of the most recently added rule as repeatable syntax
    # elements.
    def repeatable
      @cr.setRepeatable
    end

    # This function needs to be called whenever new rules or patterns have been
    # added and before the next call to TextParser#parse.
    def updateParserTables
      @rules.each_value { |rule| rule.transitions = {} }
      @rules.each_value do |rule|
        getTransitions(rule)
        checkRule(rule)
      end
    end

    # To parse the input this function needs to be called with the name of the
    # rule to start with. It returns the result of the processing function of
    # the top-level parser rule that was specified by _ruleName_.
    def parse(ruleName)
      @stack = []
      @@expectedTokens = []
      updateParserTables
      begin
        result = parseRuleR(@rules[ruleName])
      rescue TjException
        # error('parse_error', $!.message)
        return nil
      end

      result
    end

    # Return the SourceFileInfo of the TextScanner at the beginning of the
    # currently processed TextParser::Rule. Or return nil if we don't have a
    # current position.
    def sourceFileInfo
      return nil if @stack.empty?
      @stack.last.sourceFileInfo[0]
    end

    def matchingRules(keyword)
      matches = []
      @rules.each do |name, rule|
        patIdx = rule.matchingPatternIndex('_' + keyword)
        matches << [ rule, patIdx ] if patIdx
      end
      matches
    end

    def error(id, text, property = nil, sfi = nil)
      if @scanner
        @scanner.error(id, text, property, sfi)
      else
        message = Message.new(id, 'error', text, property, sfi)
        @messageHandler.send(message)
        raise TjException.new, ''
      end
    end

    def warning(id, text, property = nil, sfi = nil)
      @scanner.warning(id, text, property, sfi)
    end

  private

    # getTransitions recursively determines all possible target tokens
    # that the _rule_ matches. A target token can either be a fixed token
    # (prefixed with _), a variable token (prefixed with $) or an end token
    # (just a .). The list of found target tokens is stored in the _transitions_
    # list of the rule. For each rule pattern we store the transitions for this
    # pattern in a token -> rule hash.
    def getTransitions(rule)
      # If we have processed this rule before we can just return a copy
      # of the transitions of this rule. This avoids endless recursions.
      return rule.transitions.dup unless rule.transitions.empty?

      rule.transitions = []
      rule.patterns.each do |pat|
        allTokensOptional = true
        transitions = { }
        pat.each do |token|
          tokenId = token[1..-1]
          if token[0] == ?!
            unless @rules.has_key?(tokenId)
              raise "Fatal Error: Unknown reference to #{tokenId} in pattern " +
                    "#{pat} + of rule #{rule.name}"
            end
            refRule = @rules[tokenId]
            # If the referenced rule describes optional content, we need to look
            # at the next token as well.
            res = getTransitions(@rules[tokenId])
            allTokensOptional = false unless refRule.optional?(@rules)
            # Combine the hashes for each pattern into a single hash
            res.each do |pat_i|
              pat_i.each { |tok, r| transitions[tok] = r }
            end
            optional = true if refRule.optional
          elsif '_$.'.include?(token[0])
            transitions[token] = rule
            allTokensOptional = false
          else
            raise 'Fatal Error: Illegal token type specifier used for token' +
                  ": #{tokenId}"
          end
          break unless allTokensOptional
        end
        # Make sure that we only have one possible transition for each
        # target.
        transitions.each do |key, value|
          rule.transitions.each do |trans|
            if trans.has_key?(key)
              raise "Fatal Error: Rule #{rule.name} has ambiguous " +
                    "transitions for target #{key}"
            end
          end
        end
        rule.transitions << transitions
      end
      rule.transitions.dup
    end

    def checkRule(rule)
      if rule.patterns.empty?
        raise "Rule #{rule.name} must have at least one pattern"
      end

      rule.patterns.each do |pat|
        pat.each do |tok|
          type = tok[0]
          token = tok[1..-1]
          if type == ?$
            if @variables.index(token).nil?
              error('unsupported_token',
                    "The token #{token} is not supported here.", nil,
                    token[2])
            end
          elsif type == ?!
            if @rules[token].nil?
              raise "Fatal Error: Reference to unknown rule #{token} in " +
                    "pattern '#{pat}' of rule #{rule.name}"
            end
          end
        end
      end
    end

    # This function processes the input starting with the syntax description of
    # _rule_. It recursively calls this function whenever the syntax description
    # contains the reference to another rule.
    # This recursive version has cleaner code and is about 8% faster than
    # parseRuleNR.
    def parseRuleR(rule)
      #Log.enter('parseRuleR', "Parsing with rule #{rule.name}")
      result = rule.repeatable ? TextParserResultArray.new : nil
      # Rules can be marked 'repeatable'. This flag will be set to true after
      # the first iteration has been completed.
      repeatMode = false
      loop do
        # At the beginning of a rule we need a token from the input to determine
        # which pattern of the rule needs to be processed.
        token = getNextToken

        return result unless (pattern = findPattern(rule, token, repeatMode))
        # The @stack will store the resulting value of each element in the
        # pattern.
        @stack << TextParser::StackElement.new(pattern.function)

        pattern.each do |element|
          # Separate the type and token text for pattern element.
          elType = element[0]
          elToken = element[1..-1]
          if elType == ?!
            # The element is a reference to another rule. Return the token if
            # we still have one and continue with the referenced rule.
            unless token.nil?
              sfi = token[2]
              returnToken(token)
              token = nil
            else
              sfi = nil
            end
            @stack.last.store(parseRuleR(@rules[elToken]), sfi)
            #Log << "Resuming rule #{rule.name}"
          else
            # In case the element is a keyword or variable we have to get a new
            # token if we don't have one anymore.
            token = getNextToken unless token

            processNormalElements(elType, elToken, token)

            # The token has been consumed. Reset the variable.
            token = nil
            @@expectedTokens = []
          end
        end

        # Once the complete pattern has been processed we call the processing
        # function for this pattern to operate on the value array. Then pop the
        # entry for this rule from the stack.
        @val = @stack.last.val
        @sourceFileInfo = @stack.last.sourceFileInfo
        res = nil
        res = @stack.last.function.call unless @stack.last.function.nil?
        @stack.pop

        # If the rule is not repeatable we can store the result and break the
        # outer loop to exit the function.
        unless rule.repeatable
          result = res
          break
        end

        # Otherwise we append the result to the result array and turn repeat
        # mode on.
        result << res
        repeatMode = true
      end

      #Log.exit('parseRuleR', "Finished rule #{rule.name}")
      return result
    end

    # This function processes the input starting with the syntax description
    # of _rule_. It's implemented as an unrolled recursion.  It recursively
    # iterates over the rule tree as controlled by the input file.
    # This version is not limited by the size of the system stack. So far, I'm
    # not aware of any project that is too large for the system stack. Since
    # the recursive version parseRuleR is about 8% faster and has cleaner
    # code, we use that by default.
    def parseRuleNR(rule)
      elementIdx = 0
      recursionResult = nil
      # These flags are used to managed the control flow to and from the
      # recursion point.
      recur = resume = false
      # The stack that holds the context for the recursion levels. It's either
      # just a rule to start a new recursion or an Array of state variables.
      recursionStack = [ rule ]
      begin
        # Pop the top entry from the recursion stack.
        se = recursionStack.pop
        if se.is_a?(Array)
          # We have essentially finished a recursion level and need to get
          # back to the place where we started the recursion. First, we need
          # to restore the state again.
          rule, pattern, elementIdx, result, repeatMode, sfi = se
          #Log << "Recursion loop started in resume mode for rule #{rule.name}"
          # Now jump to the recursion point without doing anything else.
          resume = true
        else
          # Start a new recursion level. The rule tells us how to interpret
          # the input text.
          rule = se
          #Log.enter('parseRuleNR', "Parsing with rule #{rule.name}")
          resume = false
        end

        unless resume
          result = rule.repeatable ? TextParserResultArray.new : nil
          # Rules can be marked 'repeatable'. This flag will be set to true
          # after the first iteration has been completed.
          repeatMode = false
        end

        loop do
          unless resume
            # At the beginning of a rule we need a token from the input to
            # determine which pattern of the rule needs to be processed.
            token = getNextToken

            break unless (pattern = findPattern(rule, token, repeatMode))
            # The @stack will store the resulting value of each element in the
            # pattern.
            @stack << TextParser::StackElement.new(pattern.function)

            # Once we've found the right pattern, we need to process each
            # element.
            elementIdx = 0
          end

          elementCount = pattern.length
          while elementIdx < elementCount
            element = pattern[elementIdx]
            # Separate the type and token text for pattern element.
            elType = element[0]
            elToken = element[1..-1]
            if elType == ?!
              unless resume
                # The element is a reference to another rule. Return the token
                # if we still have one and continue with the referenced rule.
                if token
                  sfi = token[2]
                  returnToken(token)
                  token = nil
                else
                  sfi = nil
                end
                # This is where the recursion would happen. Instead, we push
                # the state variables and then the next rule onto the
                # recursion stack.
                recursionStack.push([ rule, pattern, elementIdx, result,
                                      repeatMode, sfi ])
                recursionStack.push(@rules[elToken])
                # Now terminate all but the outer loops without doing anything
                # else.
                recur = true
                break
              else
                # We're back right after where the recursion started. Store
                # the result and turn resume mode off again.
                @stack.last.store(recursionResult, sfi)
                resume = false
              end
            else
              # In case the element is a keyword or variable we have to get a
              # new token if we don't have one anymore.
              token = getNextToken unless token

              processNormalElements(elType, elToken, token)

              # The token has been consumed. Reset the variable.
              token = nil
              @@expectedTokens = []
            end
            elementIdx += 1
          end # of pattern while loop

          # Skip the rest of the loop in recur mode.
          break if recur

          elementIdx = 0

          # Once the complete pattern has been processed we call the
          # processing function for this pattern to operate on the value
          # array. Then pop the entry for this rule from the stack. The
          # called function will use @val and @sourceFileInfo to retrieve
          # data from the parser.
          @val = @stack.last.val
          @sourceFileInfo = @stack.last.sourceFileInfo
          res = @stack.last.function ? @stack.last.function.call : nil
          @stack.pop

          # If the rule is not repeatable we can store the result and break
          # the outer loop to exit the function.
          unless rule.repeatable
            result = res
            break
          end

          # Otherwise we append the result to the result array and turn repeat
          # mode on.
          result << res
          # We have completed the first iteration. Set the repeat mode flag to
          # indicate that further iterations are already re-runs.
          repeatMode = true
        end # of rule processing loop

        if recur
          recur = false
        else
          #Log.exit('parseRuleNR', "Finished rule #{rule.name}")
          recursionResult = result
        end
      end while !recursionStack.empty?

      return result
    end

    def getNextToken
      begin
        token = nextToken
        #Log << "Token: [#{token[0]}][#{token[1]}]"
      rescue TjException
        error('parse_rule', $!.message)
      end
      if @badVariables.include?(token[0])
        error('unsupported_token',
              "The token #{token[1]} is not supported in this context.",
              nil, token[2])
      end
      token
    end

    def findPattern(rule, token, repeatMode)
      # The scanner cannot differentiate between keywords and identifiers.  So
      # whenever an identifier is returned we have to see if we have a
      # matching keyword first. If none is found, then look for normal
      # identifiers.
      if token[0] == 'ID'
        if (patIdx = rule.matchingPatternIndex('_' + token[1])).nil?
          patIdx = rule.matchingPatternIndex("$ID")
        end
      elsif token[0] == 'LITERAL'
        patIdx = rule.matchingPatternIndex('_' + token[1])
      elsif token[0] == false
        patIdx = rule.matchingPatternIndex('.')
      else
        patIdx = rule.matchingPatternIndex('$' + token[0])
      end

      # If no matching pattern is found for the token we have to check if the
      # rule is optional or we are in repeat mode. If this is the case, return
      # the token back to the scanner. Otherwise, we have found a token we
      # cannot handle at this point.
      if patIdx.nil?
        # Append the list of expected tokens to the @@expectedToken array.
        # This may be used in a later rule to provide more details when an
        # error occured.
        rule.transitions.each do |transition|
          keys = transition.keys
          keys.collect! { |key| key[1..-1] }
          @@expectedTokens += keys
          @@expectedTokens.sort!
        end

        unless rule.optional?(@rules) || repeatMode
          error('unexpctd_token',
                (token[0] != false ?
                 "Unexpected token '#{token[1]}' of type " +
                 "'#{token[0]}'. " :
                 "Unexpected end of file in #{@scanner.fileName}. ") +
                (@@expectedTokens.length > 1 ?
                 "Expecting one of #{@@expectedTokens.join(', ')}" :
                 "Expecting #{@@expectedTokens[0]}"))
        end
        returnToken(token)
        return nil
      end

      rule.pattern(patIdx)
    end

    # Handle the elements that don't trigger a recursion.
    def processNormalElements(elType, elToken, token)
      if elType == ?_
        # If the element requires a keyword the token must match this
        # keyword.
        if elToken != token[1]
          text = "'#{elToken}' expected but found " +
                 "'#{token[1]}' (#{token[0]})."
          unless @@expectedTokens.empty?
            text = "#{@@expectedTokens.join(', ')} or " + text
          end
          error('spec_keywork_expctd', text)
        end
        @stack.last.store(elToken, token[2])
      elsif elType == ?.
        if token[0..1] != [ '.', '<END>' ]
          error('end_expected',
                "Found garbage at expected end of file: #{token[1]}\n" +
                "If you see this in the middle of your file, you probably " +
                "have closed your context too early.")
        end
      else
        # The token must match the expected variable type.
        if token[0] != elToken
          text = "'#{elToken}' expected but found " +
                 "'#{token[1]}' (#{token[0]})."
          unless @@expectedTokens.empty?
            text = "#{@@expectedTokens.join(', ')} or " + text
          end
          error('spec_token_expctd', text)
        end
        # If the element is a variable store the value of the token.
        @stack.last.store(token[1], token[2])
      end
    end

  end

end