Sha256: 53932e2e251dc46d10f409d4426c45625313ae7e8a0513c0c8de4cfb7a9d7030
Contents?: true
Size: 1008 Bytes
Versions: 11
Compression:
Stored size: 1008 Bytes
Contents
#ifndef CPPJIEBA_SEGMENTBASE_H #define CPPJIEBA_SEGMENTBASE_H #include "limonp/Logging.hpp" #include "PreFilter.hpp" #include <cassert> namespace cppjieba { const char* const SPECIAL_SEPARATORS = " \t\n\xEF\xBC\x8C\xE3\x80\x82"; using namespace limonp; class SegmentBase { public: SegmentBase() { XCHECK(ResetSeparators(SPECIAL_SEPARATORS)); } virtual ~SegmentBase() { } virtual void Cut(const string& sentence, vector<string>& words) const = 0; bool ResetSeparators(const string& s) { symbols_.clear(); RuneStrArray runes; if (!DecodeRunesInString(s, runes)) { XLOG(ERROR) << "decode " << s << " failed"; return false; } for (size_t i = 0; i < runes.size(); i++) { if (!symbols_.insert(runes[i].rune).second) { XLOG(ERROR) << s.substr(runes[i].offset, runes[i].len) << " already exists"; return false; } } return true; } protected: unordered_set<Rune> symbols_; }; // class SegmentBase } // cppjieba #endif
Version data entries
11 entries across 11 versions & 2 rubygems