fastText  d00d36476b15
Fast text processing tool/library
productquantizer.h
Go to the documentation of this file.
1 
10 #ifndef FASTTEXT_PRODUCT_QUANTIZER_H
11 #define FASTTEXT_PRODUCT_QUANTIZER_H
12 
13 #include <cstring>
14 #include <istream>
15 #include <ostream>
16 #include <vector>
17 #include <random>
18 
19 #include "real.h"
20 #include "vector.h"
21 
22 namespace fasttext {
23 
25  private:
26  const int32_t nbits_ = 8;
27  const int32_t ksub_ = 1 << nbits_;
28  const int32_t max_points_per_cluster_ = 256;
29  const int32_t max_points_ = max_points_per_cluster_ * ksub_;
30  const int32_t seed_ = 1234;
31  const int32_t niter_ = 25;
32  const real eps_ = 1e-7;
33 
34  int32_t dim_;
35  int32_t nsubq_;
36  int32_t dsub_;
37  int32_t lastdsub_;
38 
39  std::vector<real> centroids_;
40 
41  std::minstd_rand rng;
42 
43  public:
45  ProductQuantizer(int32_t, int32_t);
46 
47  real* get_centroids (int32_t, uint8_t);
48  const real* get_centroids(int32_t, uint8_t) const;
49 
50  real assign_centroid(const real*, const real*, uint8_t*, int32_t) const;
51  void Estep(const real*, const real*, uint8_t*, int32_t, int32_t) const;
52  void MStep(const real*, real*, const uint8_t*, int32_t, int32_t);
53  void kmeans(const real*, real*, int32_t, int32_t);
54  void train(int, const real*);
55 
56  real mulcode(const Vector&, const uint8_t*, int32_t, real) const;
57  void addcode(Vector&, const uint8_t*, int32_t, real) const;
58  void compute_code(const real*, uint8_t*) const;
59  void compute_codes(const real*, uint8_t*, int32_t) const;
60 
61  void save(std::ostream&);
62  void load(std::istream&);
63 };
64 
65 }
66 
67 #endif
real mulcode(const Vector &, const uint8_t *, int32_t, real) const
Definition: productquantizer.cc:149
void MStep(const real *, real *, const uint8_t *, int32_t, int32_t)
Definition: productquantizer.cc:67
int32_t nsubq_
Definition: productquantizer.h:35
void kmeans(const real *, real *, int32_t, int32_t)
Definition: productquantizer.cc:113
void save(std::ostream &)
Definition: productquantizer.cc:192
real assign_centroid(const real *, const real *, uint8_t *, int32_t) const
Definition: productquantizer.cc:43
real * get_centroids(int32_t, uint8_t)
Definition: productquantizer.cc:38
const int32_t seed_
Definition: productquantizer.h:30
Definition: productquantizer.h:24
Definition: args.cc:17
Definition: vector.h:23
const int32_t niter_
Definition: productquantizer.h:31
int32_t dsub_
Definition: productquantizer.h:36
void addcode(Vector &, const uint8_t *, int32_t, real) const
Definition: productquantizer.cc:164
const real eps_
Definition: productquantizer.h:32
std::minstd_rand rng
Definition: productquantizer.h:41
std::vector< real > centroids_
Definition: productquantizer.h:39
int32_t lastdsub_
Definition: productquantizer.h:37
void compute_codes(const real *, uint8_t *, int32_t) const
Definition: productquantizer.cc:185
int32_t dim_
Definition: productquantizer.h:34
void train(int, const real *)
Definition: productquantizer.cc:128
void compute_code(const real *, uint8_t *) const
Definition: productquantizer.cc:177
const int32_t max_points_
Definition: productquantizer.h:29
const int32_t ksub_
Definition: productquantizer.h:27
const int32_t nbits_
Definition: productquantizer.h:26
const int32_t max_points_per_cluster_
Definition: productquantizer.h:28
float real
Definition: real.h:15
ProductQuantizer()
Definition: productquantizer.h:44
void Estep(const real *, const real *, uint8_t *, int32_t, int32_t) const
Definition: productquantizer.cc:59
void load(std::istream &)
Definition: productquantizer.cc:200