/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "ANTLRErrorStrategy.h"
#include "misc/IntervalSet.h"
namespace antlr4 {
/**
* This is the default implementation of {@link ANTLRErrorStrategy} used for
* error reporting and recovery in ANTLR parsers.
*/
class ANTLR4CPP_PUBLIC DefaultErrorStrategy : public ANTLRErrorStrategy {
public:
DefaultErrorStrategy();
DefaultErrorStrategy(DefaultErrorStrategy const& other) = delete;
virtual ~DefaultErrorStrategy();
DefaultErrorStrategy& operator = (DefaultErrorStrategy const& other) = delete;
protected:
/**
* Indicates whether the error strategy is currently "recovering from an
* error". This is used to suppress reporting multiple error messages while
* attempting to recover from a detected syntax error.
*
* @see #inErrorRecoveryMode
*/
bool errorRecoveryMode;
/** The index into the input stream where the last error occurred.
* This is used to prevent infinite loops where an error is found
* but no token is consumed during recovery...another error is found,
* ad nauseum. This is a failsafe mechanism to guarantee that at least
* one token/tree node is consumed for two errors.
*/
int lastErrorIndex;
misc::IntervalSet lastErrorStates;
///
Implements Jim Idle's magic sync mechanism in closures and optional * subrules. E.g.,
* ** a : sync ( stuff sync )* ; * sync : {consume to what can follow sync} ; ** * At the start of a sub rule upon error, {@link #sync} performs single * token deletion, if possible. If it can't do that, it bails on the current * rule and uses the default error recovery, which consumes until the * resynchronization set of the current rule. * *
If the sub rule is optional ({@code (...)?}, {@code (...)*}, or block * with an empty alternative), then the expected set includes what follows * the subrule.
* *During loop iteration, it consumes until it sees a token that can start a * sub rule or what follows loop. Yes, that is pretty aggressive. We opt to * stay in the loop as long as possible.
* *ORIGINS
* *Previous versions of ANTLR did a poor job of their recovery within loops. * A single mismatch token or missing token would force the parser to bail * out of the entire rules surrounding the loop. So, for rule
* ** classDef : 'class' ID '{' member* '}' ** * input with an extra token between members would force the parser to * consume until it found the next class definition rather than the next * member definition of the current class. * *
This functionality cost a little bit of effort because the parser has to * compare token set at the start of the loop and at each iteration. If for * some reason speed is suffering for you, you can turn off this * functionality by simply overriding this method as a blank { }.
*/ virtual void sync(Parser *recognizer) override; ///This method is called when {@link #singleTokenDeletion} identifies * single-token deletion as a viable recovery strategy for a mismatched * input error.
* *The default implementation simply returns if the handler is already in * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to * enter error recovery mode, followed by calling * {@link Parser#notifyErrorListeners}.
* * @param recognizer the parser instance */ virtual void reportUnwantedToken(Parser *recognizer); /** * This method is called to report a syntax error which requires the * insertion of a missing token into the input stream. At the time this * method is called, the missing token has not yet been inserted. When this * method returns, {@code recognizer} is in error recovery mode. * *This method is called when {@link #singleTokenInsertion} identifies * single-token insertion as a viable recovery strategy for a mismatched * input error.
* *The default implementation simply returns if the handler is already in * error recovery mode. Otherwise, it calls {@link #beginErrorCondition} to * enter error recovery mode, followed by calling * {@link Parser#notifyErrorListeners}.
* * @param recognizer the parser instance */ virtual void reportMissingToken(Parser *recognizer); public: /** * {@inheritDoc} * *The default implementation attempts to recover from the mismatched input * by using single token insertion and deletion as described below. If the * recovery attempt fails, this method throws an * {@link InputMismatchException}.
* *EXTRA TOKEN (single token deletion)
* *{@code LA(1)} is not what we are looking for. If {@code LA(2)} has the * right token, however, then assume {@code LA(1)} is some extra spurious * token and delete it. Then consume and return the next token (which was * the {@code LA(2)} token) as the successful result of the match operation.
* *This recovery strategy is implemented by {@link #singleTokenDeletion}.
* *MISSING TOKEN (single token insertion)
* *If current token (at {@code LA(1)}) is consistent with what could come * after the expected {@code LA(1)} token, then assume the token is missing * and use the parser's {@link TokenFactory} to create it on the fly. The * "insertion" is performed by returning the created token as the successful * result of the match operation.
* *This recovery strategy is implemented by {@link #singleTokenInsertion}.
* *EXAMPLE
* *For example, Input {@code i=(3;} is clearly missing the {@code ')'}. When * the parser returns from the nested call to {@code expr}, it will have * call chain:
* ** stat → expr → atom ** * and it will be trying to match the {@code ')'} at this point in the * derivation: * *
* => ID '=' '(' INT ')' ('+' atom)* ';' * ^ ** * The attempt to match {@code ')'} will fail when it sees {@code ';'} and * call {@link #recoverInline}. To recover, it sees that {@code LA(1)==';'} * is in the set of tokens that can follow the {@code ')'} token reference * in rule {@code atom}. It can assume that you forgot the {@code ')'}. */ virtual Token* recoverInline(Parser *recognizer) override; ///