vendor/umappp/Umap.hpp in umappp-0.1.6 vs vendor/umappp/Umap.hpp in umappp-0.2.0

- old
+ new

@@ -155,18 +155,18 @@ * See `set_seed()`. */ static constexpr uint64_t seed = 1234567890; /** - * See `set_batch()`. + * See `set_num_threads()`. */ - static constexpr bool batch = false; + static constexpr int num_threads = 1; /** - * See `set_num_threads()`. + * See `set_parallel_optimization()`. */ - static constexpr int num_threads = 1; + static constexpr int parallel_optimization = false; }; private: InitMethod init = Defaults::initialize; int num_neighbors = Defaults::num_neighbors; @@ -182,12 +182,12 @@ struct RuntimeParameters { Float a = Defaults::a; Float b = Defaults::b; Float repulsion_strength = Defaults::repulsion_strength; Float learning_rate = Defaults::learning_rate; - bool batch = Defaults::batch; int nthreads = Defaults::num_threads; + bool parallel_optimization = Defaults::parallel_optimization; }; RuntimeParameters rparams; public: @@ -358,32 +358,16 @@ rparams.learning_rate = l; return *this; } /** - * @param b Whether to optimize in batch mode. - * Batch mode is required for effective parallelization via OpenMP but may reduce the stability of the gradient descent. - * - * Batch mode involves computing forces for all observations and applying them simultaneously. - * This is in contrast to the default where the location of observation is updated before the forces are computed for the next observation. - * As each observation's forces are computed independently, batch mode is more amenable to parallelization; - * however, this comes at the cost of stability as the force calculations for later observations are not aware of updates to the positions of earlier observations. - * - * @return A reference to this `Umap` object. - */ - Umap& set_batch(bool b = Defaults::batch) { - rparams.batch = b; - return *this; - } - - /** * @param n Number of threads to use. * * @return A reference to this `Umap` object. * * This setting affects nearest neighbor detection (if an existing list of neighbors is not supplied in `initialize()` or `run()`) and spectral initialization. - * If `set_batch()` is `true`, multiple threads will also be used during layout optimization. + * If `set_parallel_optimization()` is true, it will also affect the layout optimization, i.e., the gradient descent iterations. * * The `UMAPPP_CUSTOM_PARALLEL` macro can be set to a function that specifies a custom parallelization scheme. * This function should be a template that accept three arguments: * * - `njobs`, an integer specifying the number of jobs. @@ -402,28 +386,84 @@ Umap& set_num_threads(int n = Defaults::num_threads) { rparams.nthreads = n; return *this; } + /** + * @param p Whether to enable parallel optimization. + * If set to `true`, this will use the number of threads specified in `set_num_threads()` for the layout optimization step. + * + * @return A reference to this `Umap` object. + * + * By default, this is set to `false` as the increase in the number of threads is usually not cost-effective for layout optimization. + * Specifically, while CPU usage scales with the number of threads, the time spent does not decrease by the same factor. + * We also expect that the number of available CPUs is at least equal to the requested number of threads, otherwise contention will greatly degrade performance. + * Nonetheless, users can enable parallel optimization if cost is no issue - usually a higher number of threads (above 4) is required to see a reduction in time. + * + * If the `UMAPPP_NO_PARALLEL_OPTIMIZATION` macro is defined, **umappp** will not be compiled with support for parallel optimization. + * This may be desirable in environments that have no support for threading or atomics, or to reduce the binary size if parallelization is not of interest. + * In such cases, enabling parallel optimization and calling `Status::run()` will raise an error. + */ + Umap& set_parallel_optimization(bool p = Defaults::parallel_optimization) { + rparams.parallel_optimization = p; + return *this; + } + public: /** * @brief Status of the UMAP optimization iterations. */ struct Status { /** * @cond */ - Status(EpochData<Float> e, uint64_t seed, RuntimeParameters p) : epochs(std::move(e)), engine(seed), rparams(std::move(p)) {} + Status(EpochData<Float> e, uint64_t seed, RuntimeParameters p, int n, Float* embed) : + epochs(std::move(e)), engine(seed), rparams(std::move(p)), ndim_(n), embedding_(embed) {} EpochData<Float> epochs; std::mt19937_64 engine; RuntimeParameters rparams; + int ndim_; + Float* embedding_; /** * @endcond */ /** + * @return Number of dimensions of the embedding. + */ + int ndim() const { + return ndim_; + } + + /** + * @return Pointer to a two-dimensional column-major array where rows are dimensions (`ndim`) and columns are observations. + * This is updated by `initialize()` to store the final embedding. + */ + const Float* embedding() const { + return embedding_; + } + + /** + * @param ptr Pointer to a two-dimensional array as described in `embedding()`. + * @param copy Whether the contents of the previous array should be copied into `ptr`. + * + * By default, the `Status` objects returned by `Umap` methods will operate on embeddings in an array specified at `Status` construction time. + * This method will change the embedding array for an existing `Status` object, which can be helpful in some situations, + * e.g., to clone a `Status` object and to store its embeddings in a different array than the object. + * + * Note that the contents of the new array in `ptr` should be the same as the array that it replaces, as `run()` will continue the iteration from the coordinates inside the array. + * If a copy was already performed from the old array to the new array, the caller may set `copy = false` to avoid an extra copy. + */ + void set_embedding(Float* ptr, bool copy = true) { + if (copy) { + std::copy(embedding_, embedding_ + static_cast<size_t>(ndim()) * nobs(), ptr); + } + embedding_ = ptr; + } + + /** * @return Current epoch. */ int epoch() const { return epochs.current_epoch; } @@ -442,44 +482,44 @@ size_t nobs() const { return epochs.head.size(); } /** - * @param ndim Number of dimensions of the embedding. - * @param[in, out] embedding Two-dimensional array where rows are dimensions (`ndim`) and columns are observations. - * This contains the initial coordinates and is updated to store the final embedding. + * The status of the algorithm and the coordinates in `embedding()` are updated to the specified number of epochs. + * * @param epoch_limit Number of epochs to run to. * The actual number of epochs performed is equal to the difference between `epoch_limit` and the current number of epochs in `epoch()`. - * `epoch_limit` should be not less than `epoch()` and no greater than the maximum number of epochs specified in `Umap::set_num_epochs()`. + * `epoch_limit` should be not less than `epoch()` and be no greater than the maximum number of epochs specified in `Umap::set_num_epochs()`. * If zero, defaults to the maximum number of epochs. - * - * @return The status of the algorithm and the coordinates in `embedding` are updated to the specified number of epochs. */ - void run(int ndim, Float* embedding, int epoch_limit = 0) { - if (!rparams.batch) { + void run(int epoch_limit = 0) { + if (epoch_limit == 0) { + epoch_limit = epochs.total_epochs; + } + + if (rparams.nthreads == 1 || !rparams.parallel_optimization) { optimize_layout( - ndim, - embedding, + ndim_, + embedding_, epochs, rparams.a, rparams.b, rparams.repulsion_strength, rparams.learning_rate, engine, epoch_limit ); } else { - optimize_layout_batched( - ndim, - embedding, + optimize_layout_parallel( + ndim_, + embedding_, epochs, rparams.a, rparams.b, rparams.repulsion_strength, rparams.learning_rate, - [&]() -> auto { return engine(); }, - [](decltype(engine()) s) -> auto { return std::mt19937_64(s); }, + engine, epoch_limit, rparams.nthreads ); } return; @@ -522,11 +562,13 @@ int num_epochs_to_do = choose_num_epochs(num_epochs, x.size()); return Status( similarities_to_epochs(x, num_epochs_to_do, negative_sample_rate), seed, - std::move(pcopy) + std::move(pcopy), + ndim, + embedding ); } public: /** @@ -585,11 +627,11 @@ * This differs from the other `initialize()` methods in that it will internally compute the nearest neighbors for each observation. * It will use vantage point trees for the search - see the other `initialize()` methods to specify a custom search algorithm. */ template<typename Input = Float> Status initialize(int ndim_in, size_t nobs, const Input* input, int ndim_out, Float* embedding) { - knncolle::VpTreeEuclidean<> searcher(ndim_in, nobs, input); + knncolle::VpTreeEuclidean<int, Input, Input, Input> searcher(ndim_in, nobs, input); return initialize(&searcher, ndim_out, embedding); } #endif public: @@ -607,11 +649,11 @@ * `embedding` is updated with the embedding at the specified epoch limit. */ template<class Algorithm> Status run(const Algorithm* searcher, int ndim, Float* embedding, int epoch_limit = 0) { auto status = initialize(searcher, ndim, embedding); - status.run(ndim, embedding, epoch_limit); + status.run(epoch_limit); return status; } /** * @param x Indices and distances to the nearest neighbors for each observation. @@ -625,11 +667,11 @@ * @return The status of the algorithm is returned after running up to `epoch_limit`; this can be used for further iterations by invoking `Status::run()`. * `embedding` is updated with the embedding at the specified epoch limit. */ Status run(NeighborList<Float> x, int ndim, Float* embedding, int epoch_limit = 0) const { auto status = initialize(std::move(x), ndim, embedding); - status.run(ndim, embedding, epoch_limit); + status.run(epoch_limit); return status; } #ifndef UMAPPP_CUSTOM_NEIGHBORS /** @@ -649,10 +691,10 @@ * `embedding` is updated with the embedding at the specified epoch limit. */ template<typename Input = Float> Status run(int ndim_in, size_t nobs, const Input* input, int ndim_out, Float* embedding, int epoch_limit = 0) { auto status = initialize(ndim_in, nobs, input, ndim_out, embedding); - status.run(ndim_out, embedding, epoch_limit); + status.run(epoch_limit); return status; } #endif };