/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ #pragma once #include #include "atn/ATNSimulator.h" #include "atn/LexerATNConfig.h" #include "atn/ATNConfigSet.h" namespace antlr4 { namespace atn { /// "dup" of ParserInterpreter class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator { protected: struct ANTLR4CPP_PUBLIC SimState final { size_t index = INVALID_INDEX; size_t line = 0; size_t charPos = INVALID_INDEX; dfa::DFAState *dfaState = nullptr; void reset(); }; public: static constexpr size_t MIN_DFA_EDGE = 0; static constexpr size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN protected: /// /// When we hit an accept state in either the DFA or the ATN, we /// have to notify the character stream to start buffering characters /// via and record the current state. The current sim state /// includes the current index into the input, the current line, /// and current character position in that line. Note that the Lexer is /// tracking the starting line and characterization of the token. These /// variables track the "state" of the simulator when it hits an accept state. ///

/// We track these variables separately for the DFA and ATN simulation /// because the DFA simulation often has to fail over to the ATN /// simulation. If the ATN simulation fails, we need the DFA to fall /// back to its previously accepted state, if any. If the ATN succeeds, /// then the ATN does the accept and the DFA simulator that invoked it /// can simply return the predicted token type. ///

Lexer *const _recog; /// The current token's starting index into the character stream. /// Shared across DFA to ATN simulation in case the ATN fails and the /// DFA did not have a previous accept state. In this case, we use the /// ATN-generated exception object. size_t _startIndex; /// line number 1..n within the input. size_t _line; /// The index of the character relative to the beginning of the line 0..n-1. size_t _charPositionInLine; public: std::vector &_decisionToDFA; protected: size_t _mode; /// Used during DFA/ATN exec to record the most recent accept configuration info. SimState _prevAccept; public: LexerATNSimulator(const ATN &atn, std::vector &decisionToDFA, PredictionContextCache &sharedContextCache); LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector &decisionToDFA, PredictionContextCache &sharedContextCache); virtual ~LexerATNSimulator() = default; virtual void copyState(LexerATNSimulator *simulator); virtual size_t match(CharStream *input, size_t mode); virtual void reset() override; virtual void clearDFA() override; protected: virtual size_t matchATN(CharStream *input); virtual size_t execATN(CharStream *input, dfa::DFAState *ds0); /// /// Get an existing target state for an edge in the DFA. If the target state /// for the edge has not yet been computed or is otherwise not available, /// this method returns {@code null}. /// /// The current DFA state /// The next input symbol /// The existing target DFA state for the given input symbol /// {@code t}, or {@code null} if the target state for this edge is not /// already cached virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t); /// /// Compute a target state for an edge in the DFA, and attempt to add the /// computed state and corresponding edge to the DFA. /// /// The input stream /// The current DFA state /// The next input symbol /// /// The computed target DFA state for the given input symbol /// {@code t}. If {@code t} does not lead to a valid DFA state, this method /// returns . virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t); virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t); /// /// Given a starting configuration set, figure out all ATN configurations /// we can reach upon input {@code t}. Parameter {@code reach} is a return /// parameter. /// void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already ATNConfigSet *reach, size_t t); virtual void accept(CharStream *input, const Ref &lexerActionExecutor, size_t startIndex, size_t index, size_t line, size_t charPos); virtual ATNState *getReachableTarget(const Transition *trans, size_t t); virtual std::unique_ptr computeStartState(CharStream *input, ATNState *p); /// /// Since the alternatives within any lexer decision are ordered by /// preference, this method stops pursuing the closure as soon as an accept /// state is reached. After the first accept state is reached by depth-first /// search from {@code config}, all other (potentially reachable) states for /// this rule would have a lower priority. /// /// {@code true} if an accept state is reached, otherwise /// {@code false}. virtual bool closure(CharStream *input, const Ref &config, ATNConfigSet *configs, bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon); // side-effect: can alter configs.hasSemanticContext virtual Ref getEpsilonTarget(CharStream *input, const Ref &config, const Transition *t, ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon); /// /// Evaluate a predicate specified in the lexer. ///

/// If {@code speculative} is {@code true}, this method was called before /// for the matched character. This method should call /// before evaluating the predicate to ensure position /// sensitive values, including , , /// and , properly reflect the current /// lexer state. This method should restore {@code input} and the simulator /// to the original state before returning (i.e. undo the actions made by the /// call to . ///

/// The input stream. /// The rule containing the predicate. /// The index of the predicate within the rule. /// {@code true} if the current index in {@code input} is /// one character before the predicate's location. /// /// {@code true} if the specified predicate evaluates to /// {@code true}. virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative); virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState); virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q); virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q); /// /// Add a new DFA state if there isn't one with this set of /// configurations already. This method also detects the first /// configuration containing an ATN rule stop state. Later, when /// traversing the DFA, we will know which rule to accept. /// virtual dfa::DFAState *addDFAState(ATNConfigSet *configs); virtual dfa::DFAState *addDFAState(ATNConfigSet *configs, bool suppressEdge); public: dfa::DFA& getDFA(size_t mode); /// Get the text matched so far for the current token. virtual std::string getText(CharStream *input); virtual size_t getLine() const; virtual void setLine(size_t line); virtual size_t getCharPositionInLine(); virtual void setCharPositionInLine(size_t charPositionInLine); virtual void consume(CharStream *input); virtual std::string getTokenName(size_t t); private: void InitializeInstanceFields(); }; } // namespace atn } // namespace antlr4