/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ #pragma once #include "CharStream.h" namespace antlr4 { /// Do not buffer up the entire char stream. It does keep a small buffer /// for efficiency and also buffers while a mark exists (set by the /// lookahead prediction in parser). "Unbuffered" here refers to fact /// that it doesn't buffer all data, not that's it's on demand loading of char. class ANTLR4CPP_PUBLIC UnbufferedCharStream : public CharStream { public: /// The name or source of this char stream. std::string name; UnbufferedCharStream(std::wistream &input); virtual void consume() override; virtual size_t LA(ssize_t i) override; /// /// Return a marker that we can release later. ///

/// The specific marker value used for this class allows for some level of /// protection against misuse where {@code seek()} is called on a mark or /// {@code release()} is called in the wrong order. ///

virtual ssize_t mark() override; /// /// Decrement number of markers, resetting buffer if we hit 0. /// virtual void release(ssize_t marker) override; virtual size_t index() override; /// /// Seek to absolute character index, which might not be in the current /// sliding window. Move {@code p} to {@code index-bufferStartIndex}. /// virtual void seek(size_t index) override; virtual size_t size() override; virtual std::string getSourceName() const override; virtual std::string getText(const misc::Interval &interval) override; protected: /// A moving window buffer of the data being scanned. While there's a marker, /// we keep adding to buffer. Otherwise, resets so /// we start filling at index 0 again. // UTF-32 encoded. #if defined(_MSC_VER) && _MSC_VER == 1900 i32string _data; // Custom type for VS 2015. typedef __int32 storage_type; #else std::u32string _data; typedef char32_t storage_type; #endif /// /// 0..n-1 index into of next character. ///

/// The {@code LA(1)} character is {@code data[p]}. If {@code p == n}, we are /// out of buffered characters. ///

size_t _p; /// /// Count up with and down with /// . When we {@code release()} the last mark, /// {@code numMarkers} reaches 0 and we reset the buffer. Copy /// {@code data[p]..data[n-1]} to {@code data[0]..data[(n-1)-p]}. /// size_t _numMarkers; /// This is the {@code LA(-1)} character for the current position. size_t _lastChar; // UTF-32 /// /// When {@code numMarkers > 0}, this is the {@code LA(-1)} character for the /// first character in . Otherwise, this is unspecified. /// size_t _lastCharBufferStart; // UTF-32 /// /// Absolute character index. It's the index of the character about to be /// read via {@code LA(1)}. Goes from 0 to the number of characters in the /// entire stream, although the stream size is unknown before the end is /// reached. /// size_t _currentCharIndex; std::wistream &_input; /// /// Make sure we have 'want' elements from current position . /// Last valid {@code p} index is {@code data.length-1}. {@code p+need-1} is /// the char index 'need' elements ahead. If we need 1 element, /// {@code (p+1-1)==p} must be less than {@code data.length}. /// virtual void sync(size_t want); /// /// Add {@code n} characters to the buffer. Returns the number of characters /// actually added to the buffer. If the return value is less than {@code n}, /// then EOF was reached before {@code n} characters could be added. /// virtual size_t fill(size_t n); /// Override to provide different source of characters than /// . virtual char32_t nextChar(); virtual void add(char32_t c); size_t getBufferStartIndex() const; private: void InitializeInstanceFields(); }; } // namespace antlr4