#pragma once #include "LDA.h" namespace tomoto { template struct DocumentSLDA : public DocumentLDA<_tw> { using BaseDocument = DocumentLDA<_tw>; using DocumentLDA<_tw>::DocumentLDA; std::vector y; DEFINE_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseDocument, 0, y); DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseDocument, 1, 0x00010001, y); }; class ISLDAModel : public ILDAModel { public: enum class GLM { linear = 0, binary_logistic = 1, }; using DefaultDocType = DocumentSLDA; static ISLDAModel* create(TermWeight _weight, size_t _K = 1, const std::vector& vars = {}, Float alpha = 0.1, Float _eta = 0.01, const std::vector& _mu = {}, const std::vector& _nuSq = {}, const std::vector& _glmParam = {}, size_t seed = std::random_device{}(), bool scalarRng = false); virtual size_t addDoc(const std::vector& words, const std::vector& y) = 0; virtual std::unique_ptr makeDoc(const std::vector& words, const std::vector& y) const = 0; virtual size_t addDoc(const std::string& rawStr, const RawDocTokenizer::Factory& tokenizer, const std::vector& y) = 0; virtual std::unique_ptr makeDoc(const std::string& rawStr, const RawDocTokenizer::Factory& tokenizer, const std::vector& y) const = 0; virtual size_t addDoc(const std::string& rawStr, const std::vector& words, const std::vector& pos, const std::vector& len, const std::vector& y) = 0; virtual std::unique_ptr makeDoc(const std::string& rawStr, const std::vector& words, const std::vector& pos, const std::vector& len, const std::vector& y) const = 0; virtual size_t getF() const = 0; virtual std::vector getRegressionCoef(size_t f) const = 0; virtual GLM getTypeOfVar(size_t f) const = 0; virtual std::vector estimateVars(const DocumentBase* doc) const = 0; }; }