ext/annoy/src/annoylib.h in annoy-rb-0.6.1 vs ext/annoy/src/annoylib.h in annoy-rb-0.7.0

- old
+ new

@@ -11,12 +11,12 @@ // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the // License for the specific language governing permissions and limitations under // the License. -#ifndef ANNOYLIB_H -#define ANNOYLIB_H +#ifndef ANNOY_ANNOYLIB_H +#define ANNOY_ANNOYLIB_H #include <stdio.h> #include <sys/stat.h> #ifndef _MSC_VER #include <unistd.h> @@ -56,10 +56,14 @@ #include <vector> #include <algorithm> #include <queue> #include <limits> +#if __cplusplus >= 201103L +#include <type_traits> +#endif + #ifdef ANNOYLIB_MULTITHREADED_BUILD #include <thread> #include <mutex> #include <shared_mutex> #endif @@ -70,13 +74,13 @@ #endif // This allows others to supply their own logger / error printer without // requiring Annoy to import their headers. See RcppAnnoy for a use case. #ifndef __ERROR_PRINTER_OVERRIDE__ - #define showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); } + #define annoylib_showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); } #else - #define showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); } + #define annoylib_showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); } #endif // Portable alloc definition, cf Writing R Extensions, Section 1.6.4 #ifdef __GNUC__ // Includes GCC, clang and Intel compilers @@ -85,57 +89,60 @@ #elif defined(__sun) || defined(_AIX) // this is necessary (and sufficient) for Solaris 10 and AIX 6: # include <alloca.h> #endif -inline void set_error_from_errno(char **error, const char* msg) { - showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno); - if (error) { - *error = (char *)malloc(256); // TODO: win doesn't support snprintf - sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno); - } -} - -inline void set_error_from_string(char **error, const char* msg) { - showUpdate("%s\n", msg); - if (error) { - *error = (char *)malloc(strlen(msg) + 1); - strcpy(*error, msg); - } -} - // We let the v array in the Node struct take whatever space is needed, so this is a mostly insignificant number. // Compilers need *some* size defined for the v array, and some memory checking tools will flag for buffer overruns if this is set too low. -#define V_ARRAY_SIZE 65536 +#define ANNOYLIB_V_ARRAY_SIZE 65536 #ifndef _MSC_VER -#define popcount __builtin_popcountll +#define annoylib_popcount __builtin_popcountll #else // See #293, #358 -#define popcount cole_popcount +#define annoylib_popcount cole_popcount #endif #if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__) // See #402 -#define USE_AVX512 +#define ANNOYLIB_USE_AVX512 #elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__) -#define USE_AVX +#define ANNOYLIB_USE_AVX #else #endif -#if defined(USE_AVX) || defined(USE_AVX512) +#if defined(ANNOYLIB_USE_AVX) || defined(ANNOYLIB_USE_AVX512) #if defined(_MSC_VER) #include <intrin.h> #elif defined(__GNUC__) #include <x86intrin.h> #endif #endif #if !defined(__MINGW32__) -#define FTRUNCATE_SIZE(x) static_cast<int64_t>(x) +#define ANNOYLIB_FTRUNCATE_SIZE(x) static_cast<int64_t>(x) #else -#define FTRUNCATE_SIZE(x) (x) +#define ANNOYLIB_FTRUNCATE_SIZE(x) (x) #endif +namespace Annoy { + +inline void set_error_from_errno(char **error, const char* msg) { + annoylib_showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno); + if (error) { + *error = (char *)malloc(256); // TODO: win doesn't support snprintf + sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno); + } +} + +inline void set_error_from_string(char **error, const char* msg) { + annoylib_showUpdate("%s\n", msg); + if (error) { + *error = (char *)malloc(strlen(msg) + 1); + strcpy(*error, msg); + } +} + + using std::vector; using std::pair; using std::numeric_limits; using std::make_pair; @@ -143,11 +150,11 @@ #ifdef __linux__ *_ptr = mremap(*_ptr, old_size, new_size, MREMAP_MAYMOVE); bool ok = ftruncate(_fd, new_size) != -1; #else munmap(*_ptr, old_size); - bool ok = ftruncate(_fd, FTRUNCATE_SIZE(new_size)) != -1; + bool ok = ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(new_size)) != -1; #ifdef MAP_POPULATE *_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0); #else *_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0); #endif @@ -192,11 +199,11 @@ ++y; } return d; } -#ifdef USE_AVX +#ifdef ANNOYLIB_USE_AVX // Horizontal single sum of 256bit vector. inline float hsum256_ps_avx(__m256 v) { const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(v, 1), _mm256_castps256_ps128(v)); const __m128 x64 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128)); const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55)); @@ -275,11 +282,11 @@ return result; } #endif -#ifdef USE_AVX512 +#ifdef ANNOYLIB_USE_AVX512 template<> inline float dot<float>(const float* x, const float *y, int f) { float result = 0; if (f > 15) { __m512 d = _mm512_setzero_ps(); @@ -450,11 +457,11 @@ S n_descendants; union { S children[2]; // Will possibly store more than 2 T norm; }; - T v[V_ARRAY_SIZE]; + T v[ANNOYLIB_V_ARRAY_SIZE]; }; template<typename S, typename T> static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) { // want to calculate (a/|a| - b/|b|)^2 // = a^2 / a^2 + b^2 / b^2 - 2ab/|a||b| @@ -521,11 +528,11 @@ * This is an extension of the Angular node with an extra attribute for the scaled norm. */ S n_descendants; S children[2]; // Will possibly store more than 2 T dot_factor; - T v[V_ARRAY_SIZE]; + T v[ANNOYLIB_V_ARRAY_SIZE]; }; static const char* name() { return "dot"; } @@ -628,11 +635,11 @@ struct Hamming : Base { template<typename S, typename T> struct Node { S n_descendants; S children[2]; - T v[V_ARRAY_SIZE]; + T v[ANNOYLIB_V_ARRAY_SIZE]; }; static const size_t max_iterations = 20; template<typename T> @@ -657,11 +664,11 @@ } template<typename S, typename T> static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) { size_t dist = 0; for (int i = 0; i < f; i++) { - dist += popcount(x->v[i] ^ y->v[i]); + dist += annoylib_popcount(x->v[i] ^ y->v[i]); } return dist; } template<typename S, typename T> static inline bool margin(const Node<S, T>* n, const T* y, int f) { @@ -725,11 +732,11 @@ template<typename S, typename T> struct Node { S n_descendants; T a; // need an extra constant term to determine the offset of the plane S children[2]; - T v[V_ARRAY_SIZE]; + T v[ANNOYLIB_V_ARRAY_SIZE]; }; template<typename S, typename T> static inline T margin(const Node<S, T>* n, const T* y, int f) { return n->a + dot(n->v, y, f); } @@ -813,11 +820,11 @@ static const char* name() { return "manhattan"; } }; -template<typename S, typename T> +template<typename S, typename T, typename R = uint64_t> class AnnoyIndexInterface { public: // Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL virtual ~AnnoyIndexInterface() {}; virtual bool add_item(S item, const T* w, char** error=NULL) = 0; @@ -831,48 +838,58 @@ virtual void get_nns_by_vector(const T* w, size_t n, int search_k, vector<S>* result, vector<T>* distances) const = 0; virtual S get_n_items() const = 0; virtual S get_n_trees() const = 0; virtual void verbose(bool v) = 0; virtual void get_item(S item, T* v) const = 0; - virtual void set_seed(int q) = 0; + virtual void set_seed(R q) = 0; virtual bool on_disk_build(const char* filename, char** error=NULL) = 0; }; template<typename S, typename T, typename Distance, typename Random, class ThreadedBuildPolicy> - class AnnoyIndex : public AnnoyIndexInterface<S, T> { + class AnnoyIndex : public AnnoyIndexInterface<S, T, +#if __cplusplus >= 201103L + typename std::remove_const<decltype(Random::default_seed)>::type +#else + typename Random::seed_type +#endif + > { /* * We use random projection to build a forest of binary trees of all items. * Basically just split the hyperspace into two sides by a hyperplane, * then recursively split each of those subtrees etc. * We create a tree like this q times. The default q is determined automatically * in such a way that we at most use 2x as much memory as the vectors take. */ public: typedef Distance D; typedef typename D::template Node<S, T> Node; +#if __cplusplus >= 201103L + typedef typename std::remove_const<decltype(Random::default_seed)>::type R; +#else + typedef typename Random::seed_type R; +#endif protected: const int _f; size_t _s; S _n_items; void* _nodes; // Could either be mmapped, or point to a memory buffer that we reallocate S _n_nodes; S _nodes_size; vector<S> _roots; S _K; - bool _is_seeded; - int _seed; + R _seed; bool _loaded; bool _verbose; int _fd; bool _on_disk; bool _built; public: AnnoyIndex() : _f(0), _fd(0), _nodes(NULL), _n_items(0), _n_nodes(0), _nodes_size(0), - _is_seeded(false), _loaded(false), _verbose(false), _on_disk(false), _built(false) { } - AnnoyIndex(int f) : _f(f) { + _loaded(false), _verbose(false), _on_disk(false), _built(false) { } + AnnoyIndex(int f) : _f(f), _seed(Random::default_seed) { _s = offsetof(Node, v) + _f * sizeof(T); // Size of each node _verbose = false; _built = false; _K = (S) (((size_t) (_s - offsetof(Node, children))) / sizeof(S)); // Max number of descendants to fit into node reinitialize(); // Reset everything @@ -922,11 +939,11 @@ set_error_from_errno(error, "Unable to open"); _fd = 0; return false; } _nodes_size = 1; - if (ftruncate(_fd, FTRUNCATE_SIZE(_s) * FTRUNCATE_SIZE(_nodes_size)) == -1) { + if (ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(_s) * ANNOYLIB_FTRUNCATE_SIZE(_nodes_size)) == -1) { set_error_from_errno(error, "Unable to truncate"); return false; } #ifdef MAP_POPULATE _nodes = (Node*) mmap(0, _s * _nodes_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0); @@ -958,11 +975,11 @@ _allocate_size(_n_nodes + (S)_roots.size()); for (size_t i = 0; i < _roots.size(); i++) memcpy(_get(_n_nodes + (S)i), _get(_roots[i]), _s); _n_nodes += _roots.size(); - if (_verbose) showUpdate("has %d nodes\n", _n_nodes); + if (_verbose) annoylib_showUpdate("has %d nodes\n", _n_nodes); if (_on_disk) { if (!remap_memory_and_truncate(&_nodes, _fd, static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size), static_cast<size_t>(_s) * static_cast<size_t>(_n_nodes))) { @@ -1027,11 +1044,11 @@ _loaded = false; _n_items = 0; _n_nodes = 0; _nodes_size = 0; _on_disk = false; - _is_seeded = false; + _seed = Random::default_seed; _roots.clear(); } void unload() { if (_on_disk && _fd) { @@ -1046,11 +1063,11 @@ // We have heap allocated data free(_nodes); } } reinitialize(); - if (_verbose) showUpdate("unloaded\n"); + if (_verbose) annoylib_showUpdate("unloaded\n"); } bool load(const char* filename, bool prefault=false, char** error=NULL) { _fd = open(filename, O_RDONLY, (int)0400); if (_fd == -1) { @@ -1074,11 +1091,11 @@ int flags = MAP_SHARED; if (prefault) { #ifdef MAP_POPULATE flags |= MAP_POPULATE; #else - showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform"); + annoylib_showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform"); #endif } _nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0); _n_nodes = (S)(size / _s); @@ -1098,11 +1115,11 @@ if (_roots.size() > 1 && _get(_roots.front())->children[0] == _get(_roots.back())->children[0]) _roots.pop_back(); _loaded = true; _built = true; _n_items = m; - if (_verbose) showUpdate("found %lu roots with degree %d\n", _roots.size(), m); + if (_verbose) annoylib_showUpdate("found %lu roots with degree %d\n", _roots.size(), m); return true; } T get_distance(S i, S j) const { return D::normalized_distance(D::distance(_get(i), _get(j), _f)); @@ -1134,20 +1151,17 @@ // TODO: handle OOB Node* m = _get(item); memcpy(v, m->v, (_f) * sizeof(T)); } - void set_seed(int seed) { - _is_seeded = true; + void set_seed(R seed) { _seed = seed; } void thread_build(int q, int thread_idx, ThreadedBuildPolicy& threaded_build_policy) { - Random _random; // Each thread needs its own seed, otherwise each thread would be building the same tree(s) - int seed = _is_seeded ? _seed + thread_idx : thread_idx; - _random.set_seed(seed); + Random _random(_seed + thread_idx); vector<S> thread_roots; while (1) { if (q == -1) { threaded_build_policy.lock_n_nodes(); @@ -1160,11 +1174,11 @@ if (thread_roots.size() >= (size_t)q) { break; } } - if (_verbose) showUpdate("pass %zd...\n", thread_roots.size()); + if (_verbose) annoylib_showUpdate("pass %zd...\n", thread_roots.size()); vector<S> indices; threaded_build_policy.lock_shared_nodes(); for (S i = 0; i < _n_items; i++) { if (_get(i)->n_descendants >= 1) { // Issue #223 @@ -1190,18 +1204,18 @@ if (_on_disk) { if (!remap_memory_and_truncate(&_nodes, _fd, static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size), static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) && _verbose) - showUpdate("File truncation error\n"); + annoylib_showUpdate("File truncation error\n"); } else { _nodes = realloc(_nodes, _s * new_nodes_size); memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s); } _nodes_size = new_nodes_size; - if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes); + if (_verbose) annoylib_showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes); } void _allocate_size(S n, ThreadedBuildPolicy& threaded_build_policy) { if (n > _nodes_size) { threaded_build_policy.lock_nodes(); @@ -1279,11 +1293,11 @@ Node* n = _get(j); if (n) { bool side = D::side(m, n->v, _f, _random); children_indices[side].push_back(j); } else { - showUpdate("No node for index %d?\n", j); + annoylib_showUpdate("No node for index %d?\n", j); } } if (_split_imbalance(children_indices[0], children_indices[1]) < 0.95) break; @@ -1291,11 +1305,11 @@ threaded_build_policy.unlock_shared_nodes(); // If we didn't find a hyperplane, just randomize sides as a last option while (_split_imbalance(children_indices[0], children_indices[1]) > 0.99) { if (_verbose) - showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n", + annoylib_showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n", children_indices[0].size(), children_indices[1].size()); children_indices[0].clear(); children_indices[1].clear(); @@ -1474,8 +1488,10 @@ void unlock_roots() { roots_mutex.unlock(); } }; #endif + +} #endif // vim: tabstop=2 shiftwidth=2