ext/annoy/src/annoylib.h in annoy-rb-0.6.1 vs ext/annoy/src/annoylib.h in annoy-rb-0.7.0
- old
+ new
@@ -11,12 +11,12 @@
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.
-#ifndef ANNOYLIB_H
-#define ANNOYLIB_H
+#ifndef ANNOY_ANNOYLIB_H
+#define ANNOY_ANNOYLIB_H
#include <stdio.h>
#include <sys/stat.h>
#ifndef _MSC_VER
#include <unistd.h>
@@ -56,10 +56,14 @@
#include <vector>
#include <algorithm>
#include <queue>
#include <limits>
+#if __cplusplus >= 201103L
+#include <type_traits>
+#endif
+
#ifdef ANNOYLIB_MULTITHREADED_BUILD
#include <thread>
#include <mutex>
#include <shared_mutex>
#endif
@@ -70,13 +74,13 @@
#endif
// This allows others to supply their own logger / error printer without
// requiring Annoy to import their headers. See RcppAnnoy for a use case.
#ifndef __ERROR_PRINTER_OVERRIDE__
- #define showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); }
+ #define annoylib_showUpdate(...) { fprintf(stderr, __VA_ARGS__ ); }
#else
- #define showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); }
+ #define annoylib_showUpdate(...) { __ERROR_PRINTER_OVERRIDE__( __VA_ARGS__ ); }
#endif
// Portable alloc definition, cf Writing R Extensions, Section 1.6.4
#ifdef __GNUC__
// Includes GCC, clang and Intel compilers
@@ -85,57 +89,60 @@
#elif defined(__sun) || defined(_AIX)
// this is necessary (and sufficient) for Solaris 10 and AIX 6:
# include <alloca.h>
#endif
-inline void set_error_from_errno(char **error, const char* msg) {
- showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
- if (error) {
- *error = (char *)malloc(256); // TODO: win doesn't support snprintf
- sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
- }
-}
-
-inline void set_error_from_string(char **error, const char* msg) {
- showUpdate("%s\n", msg);
- if (error) {
- *error = (char *)malloc(strlen(msg) + 1);
- strcpy(*error, msg);
- }
-}
-
// We let the v array in the Node struct take whatever space is needed, so this is a mostly insignificant number.
// Compilers need *some* size defined for the v array, and some memory checking tools will flag for buffer overruns if this is set too low.
-#define V_ARRAY_SIZE 65536
+#define ANNOYLIB_V_ARRAY_SIZE 65536
#ifndef _MSC_VER
-#define popcount __builtin_popcountll
+#define annoylib_popcount __builtin_popcountll
#else // See #293, #358
-#define popcount cole_popcount
+#define annoylib_popcount cole_popcount
#endif
#if !defined(NO_MANUAL_VECTORIZATION) && defined(__GNUC__) && (__GNUC__ >6) && defined(__AVX512F__) // See #402
-#define USE_AVX512
+#define ANNOYLIB_USE_AVX512
#elif !defined(NO_MANUAL_VECTORIZATION) && defined(__AVX__) && defined (__SSE__) && defined(__SSE2__) && defined(__SSE3__)
-#define USE_AVX
+#define ANNOYLIB_USE_AVX
#else
#endif
-#if defined(USE_AVX) || defined(USE_AVX512)
+#if defined(ANNOYLIB_USE_AVX) || defined(ANNOYLIB_USE_AVX512)
#if defined(_MSC_VER)
#include <intrin.h>
#elif defined(__GNUC__)
#include <x86intrin.h>
#endif
#endif
#if !defined(__MINGW32__)
-#define FTRUNCATE_SIZE(x) static_cast<int64_t>(x)
+#define ANNOYLIB_FTRUNCATE_SIZE(x) static_cast<int64_t>(x)
#else
-#define FTRUNCATE_SIZE(x) (x)
+#define ANNOYLIB_FTRUNCATE_SIZE(x) (x)
#endif
+namespace Annoy {
+
+inline void set_error_from_errno(char **error, const char* msg) {
+ annoylib_showUpdate("%s: %s (%d)\n", msg, strerror(errno), errno);
+ if (error) {
+ *error = (char *)malloc(256); // TODO: win doesn't support snprintf
+ sprintf(*error, "%s: %s (%d)", msg, strerror(errno), errno);
+ }
+}
+
+inline void set_error_from_string(char **error, const char* msg) {
+ annoylib_showUpdate("%s\n", msg);
+ if (error) {
+ *error = (char *)malloc(strlen(msg) + 1);
+ strcpy(*error, msg);
+ }
+}
+
+
using std::vector;
using std::pair;
using std::numeric_limits;
using std::make_pair;
@@ -143,11 +150,11 @@
#ifdef __linux__
*_ptr = mremap(*_ptr, old_size, new_size, MREMAP_MAYMOVE);
bool ok = ftruncate(_fd, new_size) != -1;
#else
munmap(*_ptr, old_size);
- bool ok = ftruncate(_fd, FTRUNCATE_SIZE(new_size)) != -1;
+ bool ok = ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(new_size)) != -1;
#ifdef MAP_POPULATE
*_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0);
#else
*_ptr = mmap(*_ptr, new_size, PROT_READ | PROT_WRITE, MAP_SHARED, _fd, 0);
#endif
@@ -192,11 +199,11 @@
++y;
}
return d;
}
-#ifdef USE_AVX
+#ifdef ANNOYLIB_USE_AVX
// Horizontal single sum of 256bit vector.
inline float hsum256_ps_avx(__m256 v) {
const __m128 x128 = _mm_add_ps(_mm256_extractf128_ps(v, 1), _mm256_castps256_ps128(v));
const __m128 x64 = _mm_add_ps(x128, _mm_movehl_ps(x128, x128));
const __m128 x32 = _mm_add_ss(x64, _mm_shuffle_ps(x64, x64, 0x55));
@@ -275,11 +282,11 @@
return result;
}
#endif
-#ifdef USE_AVX512
+#ifdef ANNOYLIB_USE_AVX512
template<>
inline float dot<float>(const float* x, const float *y, int f) {
float result = 0;
if (f > 15) {
__m512 d = _mm512_setzero_ps();
@@ -450,11 +457,11 @@
S n_descendants;
union {
S children[2]; // Will possibly store more than 2
T norm;
};
- T v[V_ARRAY_SIZE];
+ T v[ANNOYLIB_V_ARRAY_SIZE];
};
template<typename S, typename T>
static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
// want to calculate (a/|a| - b/|b|)^2
// = a^2 / a^2 + b^2 / b^2 - 2ab/|a||b|
@@ -521,11 +528,11 @@
* This is an extension of the Angular node with an extra attribute for the scaled norm.
*/
S n_descendants;
S children[2]; // Will possibly store more than 2
T dot_factor;
- T v[V_ARRAY_SIZE];
+ T v[ANNOYLIB_V_ARRAY_SIZE];
};
static const char* name() {
return "dot";
}
@@ -628,11 +635,11 @@
struct Hamming : Base {
template<typename S, typename T>
struct Node {
S n_descendants;
S children[2];
- T v[V_ARRAY_SIZE];
+ T v[ANNOYLIB_V_ARRAY_SIZE];
};
static const size_t max_iterations = 20;
template<typename T>
@@ -657,11 +664,11 @@
}
template<typename S, typename T>
static inline T distance(const Node<S, T>* x, const Node<S, T>* y, int f) {
size_t dist = 0;
for (int i = 0; i < f; i++) {
- dist += popcount(x->v[i] ^ y->v[i]);
+ dist += annoylib_popcount(x->v[i] ^ y->v[i]);
}
return dist;
}
template<typename S, typename T>
static inline bool margin(const Node<S, T>* n, const T* y, int f) {
@@ -725,11 +732,11 @@
template<typename S, typename T>
struct Node {
S n_descendants;
T a; // need an extra constant term to determine the offset of the plane
S children[2];
- T v[V_ARRAY_SIZE];
+ T v[ANNOYLIB_V_ARRAY_SIZE];
};
template<typename S, typename T>
static inline T margin(const Node<S, T>* n, const T* y, int f) {
return n->a + dot(n->v, y, f);
}
@@ -813,11 +820,11 @@
static const char* name() {
return "manhattan";
}
};
-template<typename S, typename T>
+template<typename S, typename T, typename R = uint64_t>
class AnnoyIndexInterface {
public:
// Note that the methods with an **error argument will allocate memory and write the pointer to that string if error is non-NULL
virtual ~AnnoyIndexInterface() {};
virtual bool add_item(S item, const T* w, char** error=NULL) = 0;
@@ -831,48 +838,58 @@
virtual void get_nns_by_vector(const T* w, size_t n, int search_k, vector<S>* result, vector<T>* distances) const = 0;
virtual S get_n_items() const = 0;
virtual S get_n_trees() const = 0;
virtual void verbose(bool v) = 0;
virtual void get_item(S item, T* v) const = 0;
- virtual void set_seed(int q) = 0;
+ virtual void set_seed(R q) = 0;
virtual bool on_disk_build(const char* filename, char** error=NULL) = 0;
};
template<typename S, typename T, typename Distance, typename Random, class ThreadedBuildPolicy>
- class AnnoyIndex : public AnnoyIndexInterface<S, T> {
+ class AnnoyIndex : public AnnoyIndexInterface<S, T,
+#if __cplusplus >= 201103L
+ typename std::remove_const<decltype(Random::default_seed)>::type
+#else
+ typename Random::seed_type
+#endif
+ > {
/*
* We use random projection to build a forest of binary trees of all items.
* Basically just split the hyperspace into two sides by a hyperplane,
* then recursively split each of those subtrees etc.
* We create a tree like this q times. The default q is determined automatically
* in such a way that we at most use 2x as much memory as the vectors take.
*/
public:
typedef Distance D;
typedef typename D::template Node<S, T> Node;
+#if __cplusplus >= 201103L
+ typedef typename std::remove_const<decltype(Random::default_seed)>::type R;
+#else
+ typedef typename Random::seed_type R;
+#endif
protected:
const int _f;
size_t _s;
S _n_items;
void* _nodes; // Could either be mmapped, or point to a memory buffer that we reallocate
S _n_nodes;
S _nodes_size;
vector<S> _roots;
S _K;
- bool _is_seeded;
- int _seed;
+ R _seed;
bool _loaded;
bool _verbose;
int _fd;
bool _on_disk;
bool _built;
public:
AnnoyIndex() : _f(0), _fd(0), _nodes(NULL), _n_items(0), _n_nodes(0), _nodes_size(0),
- _is_seeded(false), _loaded(false), _verbose(false), _on_disk(false), _built(false) { }
- AnnoyIndex(int f) : _f(f) {
+ _loaded(false), _verbose(false), _on_disk(false), _built(false) { }
+ AnnoyIndex(int f) : _f(f), _seed(Random::default_seed) {
_s = offsetof(Node, v) + _f * sizeof(T); // Size of each node
_verbose = false;
_built = false;
_K = (S) (((size_t) (_s - offsetof(Node, children))) / sizeof(S)); // Max number of descendants to fit into node
reinitialize(); // Reset everything
@@ -922,11 +939,11 @@
set_error_from_errno(error, "Unable to open");
_fd = 0;
return false;
}
_nodes_size = 1;
- if (ftruncate(_fd, FTRUNCATE_SIZE(_s) * FTRUNCATE_SIZE(_nodes_size)) == -1) {
+ if (ftruncate(_fd, ANNOYLIB_FTRUNCATE_SIZE(_s) * ANNOYLIB_FTRUNCATE_SIZE(_nodes_size)) == -1) {
set_error_from_errno(error, "Unable to truncate");
return false;
}
#ifdef MAP_POPULATE
_nodes = (Node*) mmap(0, _s * _nodes_size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, _fd, 0);
@@ -958,11 +975,11 @@
_allocate_size(_n_nodes + (S)_roots.size());
for (size_t i = 0; i < _roots.size(); i++)
memcpy(_get(_n_nodes + (S)i), _get(_roots[i]), _s);
_n_nodes += _roots.size();
- if (_verbose) showUpdate("has %d nodes\n", _n_nodes);
+ if (_verbose) annoylib_showUpdate("has %d nodes\n", _n_nodes);
if (_on_disk) {
if (!remap_memory_and_truncate(&_nodes, _fd,
static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
static_cast<size_t>(_s) * static_cast<size_t>(_n_nodes))) {
@@ -1027,11 +1044,11 @@
_loaded = false;
_n_items = 0;
_n_nodes = 0;
_nodes_size = 0;
_on_disk = false;
- _is_seeded = false;
+ _seed = Random::default_seed;
_roots.clear();
}
void unload() {
if (_on_disk && _fd) {
@@ -1046,11 +1063,11 @@
// We have heap allocated data
free(_nodes);
}
}
reinitialize();
- if (_verbose) showUpdate("unloaded\n");
+ if (_verbose) annoylib_showUpdate("unloaded\n");
}
bool load(const char* filename, bool prefault=false, char** error=NULL) {
_fd = open(filename, O_RDONLY, (int)0400);
if (_fd == -1) {
@@ -1074,11 +1091,11 @@
int flags = MAP_SHARED;
if (prefault) {
#ifdef MAP_POPULATE
flags |= MAP_POPULATE;
#else
- showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform");
+ annoylib_showUpdate("prefault is set to true, but MAP_POPULATE is not defined on this platform");
#endif
}
_nodes = (Node*)mmap(0, size, PROT_READ, flags, _fd, 0);
_n_nodes = (S)(size / _s);
@@ -1098,11 +1115,11 @@
if (_roots.size() > 1 && _get(_roots.front())->children[0] == _get(_roots.back())->children[0])
_roots.pop_back();
_loaded = true;
_built = true;
_n_items = m;
- if (_verbose) showUpdate("found %lu roots with degree %d\n", _roots.size(), m);
+ if (_verbose) annoylib_showUpdate("found %lu roots with degree %d\n", _roots.size(), m);
return true;
}
T get_distance(S i, S j) const {
return D::normalized_distance(D::distance(_get(i), _get(j), _f));
@@ -1134,20 +1151,17 @@
// TODO: handle OOB
Node* m = _get(item);
memcpy(v, m->v, (_f) * sizeof(T));
}
- void set_seed(int seed) {
- _is_seeded = true;
+ void set_seed(R seed) {
_seed = seed;
}
void thread_build(int q, int thread_idx, ThreadedBuildPolicy& threaded_build_policy) {
- Random _random;
// Each thread needs its own seed, otherwise each thread would be building the same tree(s)
- int seed = _is_seeded ? _seed + thread_idx : thread_idx;
- _random.set_seed(seed);
+ Random _random(_seed + thread_idx);
vector<S> thread_roots;
while (1) {
if (q == -1) {
threaded_build_policy.lock_n_nodes();
@@ -1160,11 +1174,11 @@
if (thread_roots.size() >= (size_t)q) {
break;
}
}
- if (_verbose) showUpdate("pass %zd...\n", thread_roots.size());
+ if (_verbose) annoylib_showUpdate("pass %zd...\n", thread_roots.size());
vector<S> indices;
threaded_build_policy.lock_shared_nodes();
for (S i = 0; i < _n_items; i++) {
if (_get(i)->n_descendants >= 1) { // Issue #223
@@ -1190,18 +1204,18 @@
if (_on_disk) {
if (!remap_memory_and_truncate(&_nodes, _fd,
static_cast<size_t>(_s) * static_cast<size_t>(_nodes_size),
static_cast<size_t>(_s) * static_cast<size_t>(new_nodes_size)) &&
_verbose)
- showUpdate("File truncation error\n");
+ annoylib_showUpdate("File truncation error\n");
} else {
_nodes = realloc(_nodes, _s * new_nodes_size);
memset((char *) _nodes + (_nodes_size * _s) / sizeof(char), 0, (new_nodes_size - _nodes_size) * _s);
}
_nodes_size = new_nodes_size;
- if (_verbose) showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes);
+ if (_verbose) annoylib_showUpdate("Reallocating to %d nodes: old_address=%p, new_address=%p\n", new_nodes_size, old, _nodes);
}
void _allocate_size(S n, ThreadedBuildPolicy& threaded_build_policy) {
if (n > _nodes_size) {
threaded_build_policy.lock_nodes();
@@ -1279,11 +1293,11 @@
Node* n = _get(j);
if (n) {
bool side = D::side(m, n->v, _f, _random);
children_indices[side].push_back(j);
} else {
- showUpdate("No node for index %d?\n", j);
+ annoylib_showUpdate("No node for index %d?\n", j);
}
}
if (_split_imbalance(children_indices[0], children_indices[1]) < 0.95)
break;
@@ -1291,11 +1305,11 @@
threaded_build_policy.unlock_shared_nodes();
// If we didn't find a hyperplane, just randomize sides as a last option
while (_split_imbalance(children_indices[0], children_indices[1]) > 0.99) {
if (_verbose)
- showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n",
+ annoylib_showUpdate("\tNo hyperplane found (left has %ld children, right has %ld children)\n",
children_indices[0].size(), children_indices[1].size());
children_indices[0].clear();
children_indices[1].clear();
@@ -1474,8 +1488,10 @@
void unlock_roots() {
roots_mutex.unlock();
}
};
#endif
+
+}
#endif
// vim: tabstop=2 shiftwidth=2