vendor/tomotopy/src/TopicModel/SLDAModel.hpp in tomoto-0.1.4 vs vendor/tomotopy/src/TopicModel/SLDAModel.hpp in tomoto-0.2.0
- old
+ new
@@ -14,26 +14,28 @@
namespace detail
{
template<typename _WeightType>
struct GLMFunctor
{
- Eigen::Matrix<Float, -1, 1> regressionCoef; // Dim : (K)
+ Vector regressionCoef; // Dim : (K)
- GLMFunctor(size_t K = 0, Float mu = 0) : regressionCoef(Eigen::Matrix<Float, -1, 1>::Constant(K, mu))
+ GLMFunctor(size_t K = 0, Float mu = 0) : regressionCoef(Vector::Constant(K, mu))
{
}
virtual ISLDAModel::GLM getType() const = 0;
+ virtual std::unique_ptr<GLMFunctor> copy() const = 0;
+
virtual void updateZLL(
- Eigen::Matrix<Float, -1, 1>& zLikelihood,
+ Vector& zLikelihood,
Float y, const Eigen::Matrix<_WeightType, -1, 1>& numByTopic, size_t docId, Float docSize) const = 0;
virtual void optimizeCoef(
- const Eigen::Matrix<Float, -1, -1>& normZ,
+ const Matrix& normZ,
Float mu, Float nuSq,
- Eigen::Block<Eigen::Matrix<Float, -1, -1>, -1, 1, true> ys
+ Eigen::Block<Matrix, -1, 1, true> ys
) = 0;
virtual double getLL(Float y, const Eigen::Matrix<_WeightType, -1, 1>& numByTopic,
Float docSize) const = 0;
@@ -67,30 +69,35 @@
{
}
ISLDAModel::GLM getType() const override { return ISLDAModel::GLM::linear; }
+ std::unique_ptr<GLMFunctor<_WeightType>> copy() const override
+ {
+ return std::make_unique<LinearFunctor>(*this);
+ }
+
void updateZLL(
- Eigen::Matrix<Float, -1, 1>& zLikelihood,
+ Vector& zLikelihood,
Float y, const Eigen::Matrix<_WeightType, -1, 1>& numByTopic, size_t docId, Float docSize) const override
{
Float yErr = y -
(this->regressionCoef.array() * numByTopic.array().template cast<Float>()).sum()
/ docSize;
zLikelihood.array() *= (this->regressionCoef.array() / docSize / 2 / sigmaSq *
(2 * yErr - this->regressionCoef.array() / docSize)).exp();
}
void optimizeCoef(
- const Eigen::Matrix<Float, -1, -1>& normZ,
+ const Matrix& normZ,
Float mu, Float nuSq,
- Eigen::Block<Eigen::Matrix<Float, -1, -1>, -1, 1, true> ys
+ Eigen::Block<Matrix, -1, 1, true> ys
) override
{
- Eigen::Matrix<Float, -1, -1> selectedNormZ = normZ.array().rowwise() * (!ys.array().transpose().isNaN()).template cast<Float>();
- Eigen::Matrix<Float, -1, -1> normZZT = selectedNormZ * selectedNormZ.transpose();
- normZZT += Eigen::Matrix<Float, -1, -1>::Identity(normZZT.cols(), normZZT.cols()) / nuSq;
+ Matrix selectedNormZ = normZ.array().rowwise() * (!ys.array().transpose().isNaN()).template cast<Float>();
+ Matrix normZZT = selectedNormZ * selectedNormZ.transpose();
+ normZZT += Matrix::Identity(normZZT.cols(), normZZT.cols()) / nuSq;
this->regressionCoef = normZZT.colPivHouseholderQr().solve(selectedNormZ * ys.array().isNaN().select(0, ys).matrix());
}
double getLL(Float y, const Eigen::Matrix<_WeightType, -1, 1>& numByTopic,
Float docSize) const override
@@ -111,43 +118,48 @@
template<typename _WeightType>
struct BinaryLogisticFunctor : public GLMFunctor<_WeightType>
{
Float b = 1;
- Eigen::Matrix<Float, -1, 1> omega;
+ Vector omega;
BinaryLogisticFunctor(size_t K = 0, Float mu = 0, Float _b = 1, size_t numDocs = 0)
- : GLMFunctor<_WeightType>(K, mu), b(_b), omega{ Eigen::Matrix<Float, -1, 1>::Ones(numDocs) }
+ : GLMFunctor<_WeightType>(K, mu), b(_b), omega{ Vector::Ones(numDocs) }
{
}
ISLDAModel::GLM getType() const override { return ISLDAModel::GLM::binary_logistic; }
+ std::unique_ptr<GLMFunctor<_WeightType>> copy() const override
+ {
+ return std::make_unique<BinaryLogisticFunctor>(*this);
+ }
+
void updateZLL(
- Eigen::Matrix<Float, -1, 1>& zLikelihood,
+ Vector& zLikelihood,
Float y, const Eigen::Matrix<_WeightType, -1, 1>& numByTopic, size_t docId, Float docSize) const override
{
Float yErr = b * (y - 0.5f) -
(this->regressionCoef.array() * numByTopic.array().template cast<Float>()).sum()
/ docSize * omega[docId];
zLikelihood.array() *= (this->regressionCoef.array() / docSize *
(yErr - omega[docId] / 2 * this->regressionCoef.array() / docSize)).exp();
}
void optimizeCoef(
- const Eigen::Matrix<Float, -1, -1>& normZ,
+ const Matrix& normZ,
Float mu, Float nuSq,
- Eigen::Block<Eigen::Matrix<Float, -1, -1>, -1, 1, true> ys
+ Eigen::Block<Matrix, -1, 1, true> ys
) override
{
- Eigen::Matrix<Float, -1, -1> selectedNormZ = normZ.array().rowwise() * (!ys.array().transpose().isNaN()).template cast<Float>();
- Eigen::Matrix<Float, -1, -1> normZZT = selectedNormZ * Eigen::DiagonalMatrix<Float, -1>{ omega } * selectedNormZ.transpose();
- normZZT += Eigen::Matrix<Float, -1, -1>::Identity(normZZT.cols(), normZZT.cols()) / nuSq;
+ Matrix selectedNormZ = normZ.array().rowwise() * (!ys.array().transpose().isNaN()).template cast<Float>();
+ Matrix normZZT = selectedNormZ * Eigen::DiagonalMatrix<Float, -1>{ omega } * selectedNormZ.transpose();
+ normZZT += Matrix::Identity(normZZT.cols(), normZZT.cols()) / nuSq;
this->regressionCoef = normZZT
.colPivHouseholderQr().solve(selectedNormZ * ys.array().isNaN().select(0, b * (ys.array() - 0.5f)).matrix()
- + Eigen::Matrix<Float, -1, 1>::Constant(selectedNormZ.rows(), mu / nuSq));
+ + Vector::Constant(selectedNormZ.rows(), mu / nuSq));
RandGen rng;
for (size_t i = 0; i < (size_t)omega.size(); ++i)
{
if (std::isnan(ys[i])) continue;
@@ -171,12 +183,24 @@
return 1 / (1 + exp(-z));
}
DEFINE_SERIALIZER_AFTER_BASE(GLMFunctor<_WeightType>, b, omega);
};
+
+ struct CopyGLMFunctor
+ {
+ template<typename Wt>
+ std::vector<std::unique_ptr<GLMFunctor<Wt>>> operator()(const std::vector<std::unique_ptr<GLMFunctor<Wt>>>& o)
+ {
+ std::vector<std::unique_ptr<GLMFunctor<Wt>>> ret;
+ for (auto& p : o) ret.emplace_back(p->copy());
+ return ret;
+ }
+ };
}
+
template<TermWeight _tw, typename _RandGen,
size_t _Flags = flags::partitioned_multisampling,
typename _Interface = ISLDAModel,
typename _Derived = void,
typename _DocType = DocumentSLDA<_tw>,
@@ -196,16 +220,16 @@
uint64_t F; // number of response variables
std::vector<ISLDAModel::GLM> varTypes;
std::vector<Float> glmParam;
- Eigen::Matrix<Float, -1, 1> mu; // Mean of regression coefficients, Dim : (F)
- Eigen::Matrix<Float, -1, 1> nuSq; // Variance of regression coefficients, Dim : (F)
+ Vector mu; // Mean of regression coefficients, Dim : (F)
+ Vector nuSq; // Variance of regression coefficients, Dim : (F)
- std::vector<std::unique_ptr<detail::GLMFunctor<WeightType>>> responseVars;
- Eigen::Matrix<Float, -1, -1> normZ; // topic proportions for all docs, Dim : (K, D)
- Eigen::Matrix<Float, -1, -1> Ys; // response variables, Dim : (D, F)
+ DelegateCopy<std::vector<std::unique_ptr<detail::GLMFunctor<WeightType>>>, detail::CopyGLMFunctor> responseVars;
+ Matrix normZ; // topic proportions for all docs, Dim : (K, D)
+ Matrix Ys; // response variables, Dim : (D, F)
template<bool _asymEta>
Float* getZLikelihoods(_ModelState& ld, const _DocType& doc, size_t docId, size_t vid) const
{
const size_t V = this->realV;
@@ -297,15 +321,15 @@
{
std::unique_ptr<detail::GLMFunctor<WeightType>> v;
switch (varTypes[f])
{
case ISLDAModel::GLM::linear:
- v = make_unique<detail::LinearFunctor<WeightType>>(this->K, mu[f],
+ v = std::make_unique<detail::LinearFunctor<WeightType>>(this->K, mu[f],
f < glmParam.size() ? glmParam[f] : 1.f);
break;
case ISLDAModel::GLM::binary_logistic:
- v = make_unique<detail::BinaryLogisticFunctor<WeightType>>(this->K, mu[f],
+ v = std::make_unique<detail::BinaryLogisticFunctor<WeightType>>(this->K, mu[f],
f < glmParam.size() ? glmParam[f] : 1.f, this->docs.size());
break;
}
responseVars.emplace_back(std::move(v));
}
@@ -320,26 +344,52 @@
public:
DEFINE_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseClass, 0, F, responseVars, mu, nuSq);
DEFINE_TAGGED_SERIALIZER_AFTER_BASE_WITH_VERSION(BaseClass, 1, 0x00010001, F, responseVars, mu, nuSq);
- SLDAModel(size_t _K = 1, const std::vector<ISLDAModel::GLM>& vars = {},
- Float _alpha = 0.1, Float _eta = 0.01,
- const std::vector<Float>& _mu = {}, const std::vector<Float>& _nuSq = {},
- const std::vector<Float>& _glmParam = {},
- size_t _rg = std::random_device{}())
- : BaseClass(_K, _alpha, _eta, _rg), F(vars.size()), varTypes(vars),
- glmParam(_glmParam)
+ SLDAModel(const SLDAArgs& args)
+ : BaseClass(args), F(args.vars.size()), varTypes(args.vars),
+ glmParam(args.glmParam)
{
for (auto t : varTypes)
{
- if (t != ISLDAModel::GLM::linear && t != ISLDAModel::GLM::binary_logistic) THROW_ERROR_WITH_INFO(std::runtime_error, "unknown var GLM type in 'vars'");
+ if ((size_t)t > (size_t)ISLDAModel::GLM::binary_logistic) THROW_ERROR_WITH_INFO(exc::InvalidArgument, "unknown var GLM type in `vars`");
}
- mu = decltype(mu)::Zero(F);
- std::copy(_mu.begin(), _mu.end(), mu.data());
- nuSq = decltype(nuSq)::Ones(F);
- std::copy(_nuSq.begin(), _nuSq.end(), nuSq.data());
+
+ if (args.mu.size() == 0)
+ {
+ mu = Vector::Zero(F);
+ }
+ else if (args.mu.size() == 1)
+ {
+ mu = Vector::Constant(F, args.mu[0]);
+ }
+ else if (args.mu.size() == F)
+ {
+ mu = Eigen::Map<const Vector>(args.mu.data(), args.mu.size());
+ }
+ else
+ {
+ THROW_ERROR_WITH_INFO(exc::InvalidArgument, text::format("wrong mu value (len = %zd)", args.mu.size()));
+ }
+
+ if (args.nuSq.size() == 0)
+ {
+ nuSq = Vector::Ones(F);
+ }
+ else if (args.mu.size() == 1)
+ {
+ nuSq = Vector::Constant(F, args.nuSq[0]);
+ }
+ else if (args.mu.size() == F)
+ {
+ nuSq = Eigen::Map<const Vector>(args.nuSq.data(), args.nuSq.size());
+ }
+ else
+ {
+ THROW_ERROR_WITH_INFO(exc::InvalidArgument, text::format("wrong nuSq value (len = %zd)", args.nuSq.size()));
+ }
}
std::vector<Float> getRegressionCoef(size_t f) const override
{
return { responseVars[f]->regressionCoef.data(), responseVars[f]->regressionCoef.data() + this->K };
@@ -383,11 +433,11 @@
}
std::unique_ptr<DocumentBase> makeDoc(const RawDoc& rawDoc, const RawDocTokenizer::Factory& tokenizer) const override
{
auto doc = as_mutable(this)->template _makeFromRawDoc<true>(rawDoc, tokenizer);
- return make_unique<_DocType>(as_mutable(this)->template _updateDoc<true>(doc, rawDoc.template getMiscDefault<std::vector<Float>>("y")));
+ return std::make_unique<_DocType>(as_mutable(this)->template _updateDoc<true>(doc, rawDoc.template getMiscDefault<std::vector<Float>>("y")));
}
size_t addDoc(const RawDoc& rawDoc) override
{
auto doc = this->_makeFromRawDoc(rawDoc);
@@ -395,11 +445,11 @@
}
std::unique_ptr<DocumentBase> makeDoc(const RawDoc& rawDoc) const override
{
auto doc = as_mutable(this)->template _makeFromRawDoc<true>(rawDoc);
- return make_unique<_DocType>(as_mutable(this)->template _updateDoc<true>(doc, rawDoc.template getMiscDefault<std::vector<Float>>("y")));
+ return std::make_unique<_DocType>(as_mutable(this)->template _updateDoc<true>(doc, rawDoc.template getMiscDefault<std::vector<Float>>("y")));
}
std::vector<Float> estimateVars(const DocumentBase* doc) const override
{
std::vector<Float> ret;
@@ -422,13 +472,13 @@
else
{
switch ((ISLDAModel::GLM)(t - 1))
{
case ISLDAModel::GLM::linear:
- p = make_unique<LinearFunctor<_WeightType>>();
+ p = std::make_unique<LinearFunctor<_WeightType>>();
break;
case ISLDAModel::GLM::binary_logistic:
- p = make_unique<BinaryLogisticFunctor<_WeightType>>();
+ p = std::make_unique<BinaryLogisticFunctor<_WeightType>>();
break;
default:
throw std::ios_base::failure(text::format("wrong GLMFunctor type id %d", (t - 1)));
}
p->serializerRead(istr);