/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ #include "misc/MurmurHash.h" #include "Lexer.h" #include "Exceptions.h" #include "Vocabulary.h" #include "misc/IntervalSet.h" using namespace antlr4; using namespace antlr4::misc; IntervalSet const IntervalSet::COMPLETE_CHAR_SET = IntervalSet::of(Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE); IntervalSet const IntervalSet::EMPTY_SET; IntervalSet::IntervalSet() : _intervals() { } IntervalSet::IntervalSet(const IntervalSet &set) : IntervalSet() { _intervals = set._intervals; } IntervalSet::IntervalSet(IntervalSet&& set) : IntervalSet(std::move(set._intervals)) { } IntervalSet::IntervalSet(std::vector&& intervals) : _intervals(std::move(intervals)) { } IntervalSet& IntervalSet::operator=(const IntervalSet& other) { _intervals = other._intervals; return *this; } IntervalSet& IntervalSet::operator=(IntervalSet&& other) { _intervals = move(other._intervals); return *this; } IntervalSet IntervalSet::of(ssize_t a) { return IntervalSet({ Interval(a, a) }); } IntervalSet IntervalSet::of(ssize_t a, ssize_t b) { return IntervalSet({ Interval(a, b) }); } void IntervalSet::clear() { _intervals.clear(); } void IntervalSet::add(ssize_t el) { add(el, el); } void IntervalSet::add(ssize_t a, ssize_t b) { add(Interval(a, b)); } void IntervalSet::add(const Interval &addition) { if (addition.b < addition.a) { return; } // find position in list for (auto iterator = _intervals.begin(); iterator != _intervals.end(); ++iterator) { Interval r = *iterator; if (addition == r) { return; } if (addition.adjacent(r) || !addition.disjoint(r)) { // next to each other, make a single larger interval Interval bigger = addition.Union(r); *iterator = bigger; // make sure we didn't just create an interval that // should be merged with next interval in list while (iterator + 1 != _intervals.end()) { Interval next = *++iterator; if (!bigger.adjacent(next) && bigger.disjoint(next)) { break; } // if we bump up against or overlap next, merge iterator = _intervals.erase(iterator);// remove this one --iterator; // move backwards to what we just set *iterator = bigger.Union(next); // set to 3 merged ones // ml: no need to advance iterator, we do that in the next round anyway. ++iterator; // first call to next after previous duplicates the result } return; } if (addition.startsBeforeDisjoint(r)) { // insert before r //--iterator; _intervals.insert(iterator, addition); return; } // if disjoint and after r, a future iteration will handle it } // ok, must be after last interval (and disjoint from last interval) // just add it _intervals.push_back(addition); } IntervalSet IntervalSet::Or(const std::vector &sets) { IntervalSet result; for (const auto &s : sets) { result.addAll(s); } return result; } IntervalSet& IntervalSet::addAll(const IntervalSet &set) { // walk set and add each interval for (auto const& interval : set._intervals) { add(interval); } return *this; } IntervalSet IntervalSet::complement(ssize_t minElement, ssize_t maxElement) const { return complement(IntervalSet::of(minElement, maxElement)); } IntervalSet IntervalSet::complement(const IntervalSet &vocabulary) const { return vocabulary.subtract(*this); } IntervalSet IntervalSet::subtract(const IntervalSet &other) const { return subtract(*this, other); } IntervalSet IntervalSet::subtract(const IntervalSet &left, const IntervalSet &right) { if (left.isEmpty()) { return IntervalSet(); } if (right.isEmpty()) { // right set has no elements; just return the copy of the current set return left; } IntervalSet result(left); size_t resultI = 0; size_t rightI = 0; while (resultI < result._intervals.size() && rightI < right._intervals.size()) { Interval &resultInterval = result._intervals[resultI]; const Interval &rightInterval = right._intervals[rightI]; // operation: (resultInterval - rightInterval) and update indexes if (rightInterval.b < resultInterval.a) { rightI++; continue; } if (rightInterval.a > resultInterval.b) { resultI++; continue; } Interval beforeCurrent; Interval afterCurrent; if (rightInterval.a > resultInterval.a) { beforeCurrent = Interval(resultInterval.a, rightInterval.a - 1); } if (rightInterval.b < resultInterval.b) { afterCurrent = Interval(rightInterval.b + 1, resultInterval.b); } if (beforeCurrent.a > -1) { // -1 is the default value if (afterCurrent.a > -1) { // split the current interval into two result._intervals[resultI] = beforeCurrent; result._intervals.insert(result._intervals.begin() + resultI + 1, afterCurrent); resultI++; rightI++; } else { // replace the current interval result._intervals[resultI] = beforeCurrent; resultI++; } } else { if (afterCurrent.a > -1) { // replace the current interval result._intervals[resultI] = afterCurrent; rightI++; } else { // remove the current interval (thus no need to increment resultI) result._intervals.erase(result._intervals.begin() + resultI); } } } // If rightI reached right.intervals.size(), no more intervals to subtract from result. // If resultI reached result.intervals.size(), we would be subtracting from an empty set. // Either way, we are done. return result; } IntervalSet IntervalSet::Or(const IntervalSet &a) const { IntervalSet result; result.addAll(*this); result.addAll(a); return result; } IntervalSet IntervalSet::And(const IntervalSet &other) const { IntervalSet intersection; size_t i = 0; size_t j = 0; // iterate down both interval lists looking for nondisjoint intervals while (i < _intervals.size() && j < other._intervals.size()) { Interval mine = _intervals[i]; Interval theirs = other._intervals[j]; if (mine.startsBeforeDisjoint(theirs)) { // move this iterator looking for interval that might overlap i++; } else if (theirs.startsBeforeDisjoint(mine)) { // move other iterator looking for interval that might overlap j++; } else if (mine.properlyContains(theirs)) { // overlap, add intersection, get next theirs intersection.add(mine.intersection(theirs)); j++; } else if (theirs.properlyContains(mine)) { // overlap, add intersection, get next mine intersection.add(mine.intersection(theirs)); i++; } else if (!mine.disjoint(theirs)) { // overlap, add intersection intersection.add(mine.intersection(theirs)); // Move the iterator of lower range [a..b], but not // the upper range as it may contain elements that will collide // with the next iterator. So, if mine=[0..115] and // theirs=[115..200], then intersection is 115 and move mine // but not theirs as theirs may collide with the next range // in thisIter. // move both iterators to next ranges if (mine.startsAfterNonDisjoint(theirs)) { j++; } else if (theirs.startsAfterNonDisjoint(mine)) { i++; } } } return intersection; } bool IntervalSet::contains(size_t el) const { return contains(symbolToNumeric(el)); } bool IntervalSet::contains(ssize_t el) const { if (_intervals.empty()) return false; if (el < _intervals[0].a) // list is sorted and el is before first interval; not here return false; for (const auto &interval : _intervals) { if (el >= interval.a && el <= interval.b) { return true; // found in this interval } } return false; } bool IntervalSet::isEmpty() const { return _intervals.empty(); } ssize_t IntervalSet::getSingleElement() const { if (_intervals.size() == 1) { if (_intervals[0].a == _intervals[0].b) { return _intervals[0].a; } } return Token::INVALID_TYPE; // XXX: this value is 0, but 0 is a valid interval range, how can that work? } ssize_t IntervalSet::getMaxElement() const { if (_intervals.empty()) { return Token::INVALID_TYPE; } return _intervals.back().b; } ssize_t IntervalSet::getMinElement() const { if (_intervals.empty()) { return Token::INVALID_TYPE; } return _intervals[0].a; } std::vector const& IntervalSet::getIntervals() const { return _intervals; } size_t IntervalSet::hashCode() const { size_t hash = MurmurHash::initialize(); for (const auto &interval : _intervals) { hash = MurmurHash::update(hash, interval.a); hash = MurmurHash::update(hash, interval.b); } return MurmurHash::finish(hash, _intervals.size() * 2); } bool IntervalSet::operator == (const IntervalSet &other) const { if (_intervals.empty() && other._intervals.empty()) return true; if (_intervals.size() != other._intervals.size()) return false; return std::equal(_intervals.begin(), _intervals.end(), other._intervals.begin()); } std::string IntervalSet::toString() const { return toString(false); } std::string IntervalSet::toString(bool elemAreChar) const { if (_intervals.empty()) { return "{}"; } std::stringstream ss; size_t effectiveSize = size(); if (effectiveSize > 1) { ss << "{"; } bool firstEntry = true; for (const auto &interval : _intervals) { if (!firstEntry) ss << ", "; firstEntry = false; ssize_t a = interval.a; ssize_t b = interval.b; if (a == b) { if (a == -1) { ss << ""; } else if (elemAreChar) { ss << "'" << static_cast(a) << "'"; } else { ss << a; } } else { if (elemAreChar) { ss << "'" << static_cast(a) << "'..'" << static_cast(b) << "'"; } else { ss << a << ".." << b; } } } if (effectiveSize > 1) { ss << "}"; } return ss.str(); } std::string IntervalSet::toString(const std::vector &tokenNames) const { return toString(dfa::Vocabulary::fromTokenNames(tokenNames)); } std::string IntervalSet::toString(const dfa::Vocabulary &vocabulary) const { if (_intervals.empty()) { return "{}"; } std::stringstream ss; size_t effectiveSize = size(); if (effectiveSize > 1) { ss << "{"; } bool firstEntry = true; for (const auto &interval : _intervals) { if (!firstEntry) ss << ", "; firstEntry = false; ssize_t a = interval.a; ssize_t b = interval.b; if (a == b) { ss << elementName(vocabulary, a); } else { for (ssize_t i = a; i <= b; i++) { if (i > a) { ss << ", "; } ss << elementName(vocabulary, i); } } } if (effectiveSize > 1) { ss << "}"; } return ss.str(); } std::string IntervalSet::elementName(const std::vector &tokenNames, ssize_t a) const { return elementName(dfa::Vocabulary::fromTokenNames(tokenNames), a); } std::string IntervalSet::elementName(const dfa::Vocabulary &vocabulary, ssize_t a) const { if (a == -1) { return ""; } else if (a == -2) { return ""; } else { return vocabulary.getDisplayName(a); } } size_t IntervalSet::size() const { size_t result = 0; for (const auto &interval : _intervals) { result += size_t(interval.b - interval.a + 1); } return result; } std::vector IntervalSet::toList() const { std::vector result; for (const auto &interval : _intervals) { ssize_t a = interval.a; ssize_t b = interval.b; for (ssize_t v = a; v <= b; v++) { result.push_back(v); } } return result; } std::set IntervalSet::toSet() const { std::set result; for (const auto &interval : _intervals) { ssize_t a = interval.a; ssize_t b = interval.b; for (ssize_t v = a; v <= b; v++) { result.insert(v); } } return result; } ssize_t IntervalSet::get(size_t i) const { size_t index = 0; for (const auto &interval : _intervals) { ssize_t a = interval.a; ssize_t b = interval.b; for (ssize_t v = a; v <= b; v++) { if (index == i) { return v; } index++; } } return -1; } void IntervalSet::remove(size_t el) { remove(symbolToNumeric(el)); } void IntervalSet::remove(ssize_t el) { for (size_t i = 0; i < _intervals.size(); ++i) { Interval &interval = _intervals[i]; ssize_t a = interval.a; ssize_t b = interval.b; if (el < a) { break; // list is sorted and el is before this interval; not here } // if whole interval x..x, rm if (el == a && el == b) { _intervals.erase(_intervals.begin() + (long)i); break; } // if on left edge x..b, adjust left if (el == a) { interval.a++; break; } // if on right edge a..x, adjust right if (el == b) { interval.b--; break; } // if in middle a..x..b, split interval if (el > a && el < b) { // found in this interval ssize_t oldb = interval.b; interval.b = el - 1; // [a..x-1] add(el + 1, oldb); // add [x+1..b] break; // ml: not in the Java code but I believe we also should stop searching here, as we found x. } } }