/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ #pragma once #include "ANTLRErrorStrategy.h" #include "misc/IntervalSet.h" namespace antlr4 { /** * This is the default implementation of {@link ANTLRErrorStrategy} used for * error reporting and recovery in ANTLR parsers. */ class ANTLR4CPP_PUBLIC DefaultErrorStrategy : public ANTLRErrorStrategy { public: DefaultErrorStrategy(); DefaultErrorStrategy(DefaultErrorStrategy const& other) = delete; virtual ~DefaultErrorStrategy(); DefaultErrorStrategy& operator = (DefaultErrorStrategy const& other) = delete; protected: /** * Indicates whether the error strategy is currently "recovering from an * error". This is used to suppress reporting multiple error messages while * attempting to recover from a detected syntax error. * * @see #inErrorRecoveryMode */ bool errorRecoveryMode; /** The index into the input stream where the last error occurred. * This is used to prevent infinite loops where an error is found * but no token is consumed during recovery...another error is found, * ad nauseum. This is a failsafe mechanism to guarantee that at least * one token/tree node is consumed for two errors. */ int lastErrorIndex; misc::IntervalSet lastErrorStates; /// /// {@inheritDoc} ///

/// The default implementation simply calls to /// ensure that the handler is not in error recovery mode. ///

public: virtual void reset(Parser *recognizer) override; /// /// This method is called to enter error recovery mode when a recognition /// exception is reported. /// /// the parser instance protected: virtual void beginErrorCondition(Parser *recognizer); /// /// {@inheritDoc} /// public: virtual bool inErrorRecoveryMode(Parser *recognizer) override; /// /// This method is called to leave error recovery mode after recovering from /// a recognition exception. /// /// protected: virtual void endErrorCondition(Parser *recognizer); /// /// {@inheritDoc} ///

/// The default implementation simply calls . ///

public: virtual void reportMatch(Parser *recognizer) override; /// {@inheritDoc} ///

/// The default implementation returns immediately if the handler is already /// in error recovery mode. Otherwise, it calls /// and dispatches the reporting task based on the runtime type of {@code e} /// according to the following table. /// ///

virtual void reportError(Parser *recognizer, const RecognitionException &e) override; /// /// {@inheritDoc} ///

/// The default implementation resynchronizes the parser by consuming tokens /// until we find one in the resynchronization set--loosely the set of tokens /// that can follow the current rule. ///

virtual void recover(Parser *recognizer, std::exception_ptr e) override; /** * The default implementation of {@link ANTLRErrorStrategy#sync} makes sure * that the current lookahead symbol is consistent with what were expecting * at this point in the ATN. You can call this anytime but ANTLR only * generates code to check before subrules/loops and each iteration. * *

Implements Jim Idle's magic sync mechanism in closures and optional * subrules. E.g.,

* *
     * a : sync ( stuff sync )* ;
     * sync : {consume to what can follow sync} ;
     * 
* * At the start of a sub rule upon error, {@link #sync} performs single * token deletion, if possible. If it can't do that, it bails on the current * rule and uses the default error recovery, which consumes until the * resynchronization set of the current rule. * *

If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block * with an empty alternative), then the expected set includes what follows * the subrule.

* *

During loop iteration, it consumes until it sees a token that can start a * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to * stay in the loop as long as possible.

* *

ORIGINS

* *

Previous versions of ANTLR did a poor job of their recovery within loops. * A single mismatch token or missing token would force the parser to bail * out of the entire rules surrounding the loop. So, for rule

* *
     * classDef : 'class' ID '{' member* '}'
     * 
* * input with an extra token between members would force the parser to * consume until it found the next class definition rather than the next * member definition of the current class. * *

This functionality cost a little bit of effort because the parser has to * compare token set at the start of the loop and at each iteration. If for * some reason speed is suffering for you, you can turn off this * functionality by simply overriding this method as a blank { }.

*/ virtual void sync(Parser *recognizer) override; /// /// This is called by when the exception is a /// . /// /// /// the parser instance /// the recognition exception protected: virtual void reportNoViableAlternative(Parser *recognizer, const NoViableAltException &e); /// /// This is called by when the exception is an /// . /// /// /// the parser instance /// the recognition exception virtual void reportInputMismatch(Parser *recognizer, const InputMismatchException &e); /// /// This is called by when the exception is a /// . /// /// /// the parser instance /// the recognition exception virtual void reportFailedPredicate(Parser *recognizer, const FailedPredicateException &e); /** * This method is called to report a syntax error which requires the removal * of a token from the input stream. At the time this method is called, the * erroneous symbol is current {@code LT(1)} symbol and has not yet been * removed from the input stream. When this method returns, * {@code recognizer} is in error recovery mode. * *

This method is called when {@link #singleTokenDeletion} identifies * single-token deletion as a viable recovery strategy for a mismatched * input error.

* *

The default implementation simply returns if the handler is already in * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to * enter error recovery mode, followed by calling * {@link Parser#notifyErrorListeners}.

* * @param recognizer the parser instance */ virtual void reportUnwantedToken(Parser *recognizer); /** * This method is called to report a syntax error which requires the * insertion of a missing token into the input stream. At the time this * method is called, the missing token has not yet been inserted. When this * method returns, {@code recognizer} is in error recovery mode. * *

This method is called when {@link #singleTokenInsertion} identifies * single-token insertion as a viable recovery strategy for a mismatched * input error.

* *

The default implementation simply returns if the handler is already in * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to * enter error recovery mode, followed by calling * {@link Parser#notifyErrorListeners}.

* * @param recognizer the parser instance */ virtual void reportMissingToken(Parser *recognizer); public: /** * {@inheritDoc} * *

The default implementation attempts to recover from the mismatched input * by using single token insertion and deletion as described below. If the * recovery attempt fails, this method throws an * {@link InputMismatchException}.

* *

EXTRA TOKEN (single token deletion)

* *

{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the * right token, however, then assume {@code LA(1)} is some extra spurious * token and delete it. Then consume and return the next token (which was * the {@code LA(2)} token) as the successful result of the match operation.

* *

This recovery strategy is implemented by {@link #singleTokenDeletion}.

* *

MISSING TOKEN (single token insertion)

* *

If current token (at {@code LA(1)}) is consistent with what could come * after the expected {@code LA(1)} token, then assume the token is missing * and use the parser's {@link TokenFactory} to create it on the fly. The * "insertion" is performed by returning the created token as the successful * result of the match operation.

* *

This recovery strategy is implemented by {@link #singleTokenInsertion}.

* *

EXAMPLE

* *

For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When * the parser returns from the nested call to {@code expr}, it will have * call chain:

* *
     * stat → expr → atom
     * 
* * and it will be trying to match the {@code ')'} at this point in the * derivation: * *
     * => ID '=' '(' INT ')' ('+' atom)* ';'
     *                    ^
     * 
* * The attempt to match {@code ')'} will fail when it sees {@code ';'} and * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} * is in the set of tokens that can follow the {@code ')'} token reference * in rule {@code atom}. It can assume that you forgot the {@code ')'}. */ virtual Token* recoverInline(Parser *recognizer) override; /// /// This method implements the single-token insertion inline error recovery /// strategy. It is called by if the single-token /// deletion strategy fails to recover from the mismatched input. If this /// method returns {@code true}, {@code recognizer} will be in error recovery /// mode. ///

/// This method determines whether or not single-token insertion is viable by /// checking if the {@code LA(1)} input symbol could be successfully matched /// if it were instead the {@code LA(2)} symbol. If this method returns /// {@code true}, the caller is responsible for creating and inserting a /// token with the correct type to produce this behavior. ///

/// the parser instance /// {@code true} if single-token insertion is a viable recovery /// strategy for the current mismatched input, otherwise {@code false} protected: virtual bool singleTokenInsertion(Parser *recognizer); /// /// This method implements the single-token deletion inline error recovery /// strategy. It is called by to attempt to recover /// from mismatched input. If this method returns null, the parser and error /// handler state will not have changed. If this method returns non-null, /// {@code recognizer} will not be in error recovery mode since the /// returned token was a successful match. ///

/// If the single-token deletion is successful, this method calls /// to report the error, followed by /// to actually "delete" the extraneous token. Then, /// before returning is called to signal a successful /// match. ///

/// the parser instance /// the successfully matched instance if single-token /// deletion successfully recovers from the mismatched input, otherwise /// {@code null} virtual Token* singleTokenDeletion(Parser *recognizer); /// /// Conjure up a missing token during error recovery. /// /// The recognizer attempts to recover from single missing /// symbols. But, actions might refer to that missing symbol. /// For example, x=ID {f($x);}. The action clearly assumes /// that there has been an identifier matched previously and that /// $x points at that token. If that token is missing, but /// the next token in the stream is what we want we assume that /// this token is missing and we keep going. Because we /// have to return some token to replace the missing token, /// we have to conjure one up. This method gives the user control /// over the tokens returned for missing tokens. Mostly, /// you will want to create something special for identifier /// tokens. For literals such as '{' and ',', the default /// action in the parser or tree parser works. It simply creates /// a CommonToken of the appropriate type. The text will be the token. /// If you change what tokens must be created by the lexer, /// override this method to create the appropriate tokens. /// virtual Token* getMissingSymbol(Parser *recognizer); virtual misc::IntervalSet getExpectedTokens(Parser *recognizer); /// /// How should a token be displayed in an error message? The default /// is to display just the text, but during development you might /// want to have a lot of information spit out. Override in that case /// to use t.toString() (which, for CommonToken, dumps everything about /// the token). This is better than forcing you to override a method in /// your token objects because you don't have to go modify your lexer /// so that it creates a new class. /// virtual std::string getTokenErrorDisplay(Token *t); virtual std::string getSymbolText(Token *symbol); virtual size_t getSymbolType(Token *symbol); virtual std::string escapeWSAndQuote(const std::string &s) const; /* Compute the error recovery set for the current rule. During * rule invocation, the parser pushes the set of tokens that can * follow that rule reference on the stack; this amounts to * computing FIRST of what follows the rule reference in the * enclosing rule. See LinearApproximator.FIRST(). * This local follow set only includes tokens * from within the rule; i.e., the FIRST computation done by * ANTLR stops at the end of a rule. * * EXAMPLE * * When you find a "no viable alt exception", the input is not * consistent with any of the alternatives for rule r. The best * thing to do is to consume tokens until you see something that * can legally follow a call to r *or* any rule that called r. * You don't want the exact set of viable next tokens because the * input might just be missing a token--you might consume the * rest of the input looking for one of the missing tokens. * * Consider grammar: * * a : '[' b ']' * | '(' b ')' * ; * b : c '^' INT ; * c : ID * | INT * ; * * At each rule invocation, the set of tokens that could follow * that rule is pushed on a stack. Here are the various * context-sensitive follow sets: * * FOLLOW(b1_in_a) = FIRST(']') = ']' * FOLLOW(b2_in_a) = FIRST(')') = ')' * FOLLOW(c_in_b) = FIRST('^') = '^' * * Upon erroneous input "[]", the call chain is * * a -> b -> c * * and, hence, the follow context stack is: * * depth follow set start of rule execution * 0 a (from main()) * 1 ']' b * 2 '^' c * * Notice that ')' is not included, because b would have to have * been called from a different context in rule a for ')' to be * included. * * For error recovery, we cannot consider FOLLOW(c) * (context-sensitive or otherwise). We need the combined set of * all context-sensitive FOLLOW sets--the set of all tokens that * could follow any reference in the call chain. We need to * resync to one of those tokens. Note that FOLLOW(c)='^' and if * we resync'd to that token, we'd consume until EOF. We need to * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}. * In this case, for input "[]", LA(1) is ']' and in the set, so we would * not consume anything. After printing an error, rule c would * return normally. Rule b would not find the required '^' though. * At this point, it gets a mismatched token error and throws an * exception (since LA(1) is not in the viable following token * set). The rule exception handler tries to recover, but finds * the same recovery set and doesn't consume anything. Rule b * exits normally returning to rule a. Now it finds the ']' (and * with the successful match exits errorRecovery mode). * * So, you can see that the parser walks up the call chain looking * for the token that was a member of the recovery set. * * Errors are not generated in errorRecovery mode. * * ANTLR's error recovery mechanism is based upon original ideas: * * "Algorithms + Data Structures = Programs" by Niklaus Wirth * * and * * "A note on error recovery in recursive descent parsers": * http://portal.acm.org/citation.cfm?id=947902.947905 * * Later, Josef Grosch had some good ideas: * * "Efficient and Comfortable Error Recovery in Recursive Descent * Parsers": * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip * * Like Grosch I implement context-sensitive FOLLOW sets that are combined * at run-time upon error to avoid overhead during parsing. */ virtual misc::IntervalSet getErrorRecoverySet(Parser *recognizer); /// /// Consume tokens until one matches the given token set. virtual void consumeUntil(Parser *recognizer, const misc::IntervalSet &set); private: std::vector> _errorSymbols; // Temporarily created token. void InitializeInstanceFields(); }; } // namespace antlr4