/* Isolation forests and variations thereof, with adjustments for incorporation * of categorical variables and missing values. * Writen for C++11 standard and aimed at being used in R and Python. * * This library is based on the following works: * [1] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou. * "Isolation forest." * 2008 Eighth IEEE International Conference on Data Mining. IEEE, 2008. * [2] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou. * "Isolation-based anomaly detection." * ACM Transactions on Knowledge Discovery from Data (TKDD) 6.1 (2012): 3. * [3] Hariri, Sahand, Matias Carrasco Kind, and Robert J. Brunner. * "Extended Isolation Forest." * arXiv preprint arXiv:1811.02141 (2018). * [4] Liu, Fei Tony, Kai Ming Ting, and Zhi-Hua Zhou. * "On detecting clustered anomalies using SCiForest." * Joint European Conference on Machine Learning and Knowledge Discovery in Databases. Springer, Berlin, Heidelberg, 2010. * [5] https://sourceforge.net/projects/iforest/ * [6] https://math.stackexchange.com/questions/3388518/expected-number-of-paths-required-to-separate-elements-in-a-binary-tree * [7] Quinlan, J. Ross. C4. 5: programs for machine learning. Elsevier, 2014. * [8] Cortes, David. * "Distance approximation using Isolation Forests." * arXiv preprint arXiv:1910.12362 (2019). * [9] Cortes, David. * "Imputing missing values with unsupervised random trees." * arXiv preprint arXiv:1911.06646 (2019). * [10] https://math.stackexchange.com/questions/3333220/expected-average-depth-in-random-binary-tree-constructed-top-to-bottom * [11] Cortes, David. * "Revisiting randomized choices in isolation forests." * arXiv preprint arXiv:2110.13402 (2021). * [12] Guha, Sudipto, et al. * "Robust random cut forest based anomaly detection on streams." * International conference on machine learning. PMLR, 2016. * [13] Cortes, David. * "Isolation forests: looking beyond tree depth." * arXiv preprint arXiv:2111.11639 (2021). * [14] Ting, Kai Ming, Yue Zhu, and Zhi-Hua Zhou. * "Isolation kernel and its effect on SVM" * Proceedings of the 24th ACM SIGKDD * International Conference on Knowledge Discovery & Data Mining. 2018. * * BSD 2-Clause License * Copyright (c) 2019-2022, David Cortes * All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* Note: the R and Python versions calls the 'sort_csc_indices' templated function, so it's not enough to just include 'isotree_exportable.hpp' under them and let this same file instantiate all supported templated types. Also, Cython makes it hard to use overloaded functions since they have to be declared multiple times. */ #if !defined(_FOR_R) && !defined(_FOR_PYTHON) #include "headers_joined.hpp" #define real_t double #define sparse_ix int #include "instantiate_template_headers.hpp" #undef real_t #undef sparse_ix #ifndef NO_TEMPLATED_VERSIONS #define real_t double #define sparse_ix int64_t #include "instantiate_template_headers.hpp" #undef real_t #undef sparse_ix #define real_t double #define sparse_ix size_t #include "instantiate_template_headers.hpp" #undef real_t #undef sparse_ix #define _NO_REAL_T #define real_t float #define sparse_ix int #include "instantiate_template_headers.hpp" #undef real_t #undef sparse_ix #define real_t float #define sparse_ix int64_t #include "instantiate_template_headers.hpp" #undef real_t #undef sparse_ix #define real_t float #define sparse_ix size_t #include "instantiate_template_headers.hpp" #undef real_t #undef sparse_ix #undef _NO_REAL_T #endif /* NO_TEMPLATED_VERSIONS */ #endif