/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
#pragma once
#include "CharStream.h"
namespace antlr4 {
/// Do not buffer up the entire char stream. It does keep a small buffer
/// for efficiency and also buffers while a mark exists (set by the
/// lookahead prediction in parser). "Unbuffered" here refers to fact
/// that it doesn't buffer all data, not that's it's on demand loading of char.
class ANTLR4CPP_PUBLIC UnbufferedCharStream : public CharStream {
public:
/// The name or source of this char stream.
std::string name;
UnbufferedCharStream(std::wistream &input);
virtual void consume() override;
virtual size_t LA(ssize_t i) override;
///
/// Return a marker that we can release later.
///
/// The specific marker value used for this class allows for some level of
/// protection against misuse where {@code seek()} is called on a mark or
/// {@code release()} is called in the wrong order.
///
virtual ssize_t mark() override;
///
/// Decrement number of markers, resetting buffer if we hit 0.
///
virtual void release(ssize_t marker) override;
virtual size_t index() override;
///
/// Seek to absolute character index, which might not be in the current
/// sliding window. Move {@code p} to {@code index-bufferStartIndex}.
///
virtual void seek(size_t index) override;
virtual size_t size() override;
virtual std::string getSourceName() const override;
virtual std::string getText(const misc::Interval &interval) override;
protected:
/// A moving window buffer of the data being scanned. While there's a marker,
/// we keep adding to buffer. Otherwise, resets so
/// we start filling at index 0 again.
// UTF-32 encoded.
#if defined(_MSC_VER) && _MSC_VER == 1900
i32string _data; // Custom type for VS 2015.
typedef __int32 storage_type;
#else
std::u32string _data;
typedef char32_t storage_type;
#endif
///
/// 0..n-1 index into of next character.
///
/// The {@code LA(1)} character is {@code data[p]}. If {@code p == n}, we are
/// out of buffered characters.
///
size_t _p;
///
/// Count up with and down with
/// . When we {@code release()} the last mark,
/// {@code numMarkers} reaches 0 and we reset the buffer. Copy
/// {@code data[p]..data[n-1]} to {@code data[0]..data[(n-1)-p]}.
///
size_t _numMarkers;
/// This is the {@code LA(-1)} character for the current position.
size_t _lastChar; // UTF-32
///
/// When {@code numMarkers > 0}, this is the {@code LA(-1)} character for the
/// first character in . Otherwise, this is unspecified.
///
size_t _lastCharBufferStart; // UTF-32
///
/// Absolute character index. It's the index of the character about to be
/// read via {@code LA(1)}. Goes from 0 to the number of characters in the
/// entire stream, although the stream size is unknown before the end is
/// reached.
///
size_t _currentCharIndex;
std::wistream &_input;
///
/// Make sure we have 'want' elements from current position .
/// Last valid {@code p} index is {@code data.length-1}. {@code p+need-1} is
/// the char index 'need' elements ahead. If we need 1 element,
/// {@code (p+1-1)==p} must be less than {@code data.length}.
///
virtual void sync(size_t want);
///
/// Add {@code n} characters to the buffer. Returns the number of characters
/// actually added to the buffer. If the return value is less than {@code n},
/// then EOF was reached before {@code n} characters could be added.
///
virtual size_t fill(size_t n);
/// Override to provide different source of characters than
/// .
virtual char32_t nextChar();
virtual void add(char32_t c);
size_t getBufferStartIndex() const;
private:
void InitializeInstanceFields();
};
} // namespace antlr4