vendor/isotree/src/Rwrapper.cpp in isotree-0.3.0 vs vendor/isotree/src/Rwrapper.cpp in isotree-0.3.1

- old
+ new

@@ -38,11 +38,11 @@ * "Isolation kernel and its effect on SVM" * Proceedings of the 24th ACM SIGKDD * International Conference on Knowledge Discovery & Data Mining. 2018. * * BSD 2-Clause License -* Copyright (c) 2019-2022, David Cortes +* Copyright (c) 2019-2023, David Cortes * All rights reserved. * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright notice, this * list of conditions and the following disclaimer. @@ -62,16 +62,14 @@ */ #ifdef _FOR_R #include <Rcpp.h> #include <Rcpp/unwindProtect.h> -// [[Rcpp::plugins(unwindProtect)]] #include <Rinternals.h> +#include <R_ext/Altrep.h> -#ifndef _FOR_R -#define FOR_R -#endif +#include <type_traits> /* This is the package's header */ #include "isotree.hpp" /* Library is templated, base R comes with only these 2 types though */ @@ -85,11 +83,11 @@ /* Note: the R version calls the 'sort_csc_indices' templated function, so it's not enough to just include 'isotree_exportable.hpp' and let the templates be instantiated elsewhere. */ -#define throw_mem_err() Rcpp::stop("Error: insufficient memory. Try smaller sample sizes and fewer trees.\n") +#define throw_mem_err() throw Rcpp::exception("Error: insufficient memory. Try smaller sample sizes and fewer trees.\n") SEXP alloc_RawVec(void *data) { size_t vecsize = *(size_t*)data; if (unlikely(vecsize > (size_t)std::numeric_limits<R_xlen_t>::max())) @@ -146,13 +144,13 @@ template <class Model> Rcpp::RawVector serialize_cpp_obj(const Model *model_outputs) { size_t serialized_size = determine_serialized_size(*model_outputs); if (unlikely(!serialized_size)) - Rcpp::stop("Unexpected error."); + throw Rcpp::exception("Unexpected error."); if (unlikely(serialized_size > (size_t)std::numeric_limits<R_xlen_t>::max())) - Rcpp::stop("Resulting model is too large for R to handle."); + throw Rcpp::exception("Resulting model is too large for R to handle."); Rcpp::RawVector out = Rcpp::unwindProtect(alloc_RawVec, (void*)&serialized_size); char *out_ = (char*)RAW(out); serialize_isotree(*model_outputs, out_); return out; } @@ -193,15 +191,283 @@ { return deserialize_cpp_obj<TreesIndexer>(src); } // [[Rcpp::export(rng = false)]] -Rcpp::LogicalVector check_null_ptr_model(SEXP ptr_model) +SEXP serialize_IsoForest_from_ptr(SEXP R_ptr) { + const IsoForest* model = (const IsoForest*)R_ExternalPtrAddr(R_ptr); + return serialize_cpp_obj<IsoForest>(model); +} + +// [[Rcpp::export(rng = false)]] +SEXP serialize_ExtIsoForest_from_ptr(SEXP R_ptr) +{ + const ExtIsoForest* model = (const ExtIsoForest*)R_ExternalPtrAddr(R_ptr); + return serialize_cpp_obj<ExtIsoForest>(model); +} + +// [[Rcpp::export(rng = false)]] +SEXP serialize_Imputer_from_ptr(SEXP R_ptr) +{ + const Imputer* model = (const Imputer*)R_ExternalPtrAddr(R_ptr); + return serialize_cpp_obj<Imputer>(model); +} + +// [[Rcpp::export(rng = false)]] +SEXP serialize_Indexer_from_ptr(SEXP R_ptr) +{ + const TreesIndexer* model = (const TreesIndexer*)R_ExternalPtrAddr(R_ptr); + return serialize_cpp_obj<TreesIndexer>(model); +} + +// [[Rcpp::export(rng = false)]] +Rcpp::LogicalVector check_null_ptr_model_internal(SEXP ptr_model) +{ return Rcpp::LogicalVector(R_ExternalPtrAddr(ptr_model) == NULL); } +static R_altrep_class_t altrepped_pointer_IsoForest; +static R_altrep_class_t altrepped_pointer_ExtIsoForest; +static R_altrep_class_t altrepped_pointer_Imputer; +static R_altrep_class_t altrepped_pointer_TreesIndexer; +static R_altrep_class_t altrepped_pointer_NullPointer; + +template <class Model> +R_altrep_class_t get_altrep_obj_class() +{ + if (std::is_same<Model, IsoForest>::value) return altrepped_pointer_IsoForest; + + if (std::is_same<Model, ExtIsoForest>::value) return altrepped_pointer_ExtIsoForest; + + if (std::is_same<Model, Imputer>::value) return altrepped_pointer_Imputer; + + if (std::is_same<Model, TreesIndexer>::value) return altrepped_pointer_TreesIndexer; + + throw Rcpp::exception("Internal error. Please open a bug report."); +} + +R_xlen_t altrepped_pointer_length(SEXP obj) +{ + return 1; +} + +SEXP get_element_from_altrepped_obj(SEXP R_altrepped_obj, R_xlen_t idx) +{ + return R_altrep_data1(R_altrepped_obj); +} + +template <class Model> +void delete_model_from_R_ptr(SEXP R_ptr) +{ + Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_ptr); + delete cpp_ptr; + R_SetExternalPtrAddr(R_ptr, nullptr); + R_ClearExternalPtr(R_ptr); +} + +template <class Model> +SEXP get_altrepped_pointer(void *void_ptr) +{ + SEXP R_ptr_name = PROTECT(Rf_mkString("ptr")); + SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle")); + SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); + SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue)); + + std::unique_ptr<Model> *ptr = (std::unique_ptr<Model>*)void_ptr; + R_SetExternalPtrAddr(R_ptr, ptr->get()); + R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE); + ptr->release(); + + R_set_altrep_data1(out, R_ptr); + Rf_setAttrib(out, R_NamesSymbol, R_ptr_name); + Rf_setAttrib(out, R_ClassSymbol, R_ptr_class); + + UNPROTECT(4); + return out; +} + +template <class Model> +SEXP serialize_altrepped_pointer(SEXP altrepped_obj) +{ + try { + Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj)); + R_xlen_t state_size = determine_serialized_size(*cpp_ptr); + SEXP R_state = PROTECT(Rf_allocVector(RAWSXP, state_size)); + serialize_isotree(*cpp_ptr, (char*)RAW(R_state)); + UNPROTECT(1); + return R_state; + } + catch (const std::exception &ex) { + Rf_error("%s\n", ex.what()); + } + + return R_NilValue; /* <- won't be reached */ +} + +template <class Model> +SEXP deserialize_altrepped_pointer(SEXP cls, SEXP R_state) +{ + SEXP R_ptr_name = PROTECT(Rf_mkString("ptr")); + SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle")); + SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); + SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue)); + + try { + std::unique_ptr<Model> model(new Model()); + const char *inp = (const char*)RAW(R_state); + deserialize_isotree(*model, inp); + + R_SetExternalPtrAddr(R_ptr, model.get()); + R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE); + model.release(); + } + catch (const std::exception &ex) { + Rf_error("%s\n", ex.what()); + } + + R_set_altrep_data1(out, R_ptr); + Rf_setAttrib(out, R_NamesSymbol, R_ptr_name); + Rf_setAttrib(out, R_ClassSymbol, R_ptr_class); + + UNPROTECT(4); + return out; +} + +template <class Model> +SEXP duplicate_altrepped_pointer(SEXP altrepped_obj, Rboolean deep) +{ + SEXP R_ptr_name = PROTECT(Rf_mkString("ptr")); + SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle")); + SEXP out = PROTECT(R_new_altrep(get_altrep_obj_class<Model>(), R_NilValue, R_NilValue)); + + if (!deep) { + R_set_altrep_data1(out, R_altrep_data1(altrepped_obj)); + } + + else { + + SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); + + try { + std::unique_ptr<Model> new_obj(new Model()); + Model *cpp_ptr = (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj)); + *new_obj = *cpp_ptr; + + R_SetExternalPtrAddr(R_ptr, new_obj.get()); + R_RegisterCFinalizerEx(R_ptr, delete_model_from_R_ptr<Model>, TRUE); + new_obj.release(); + } + + catch (const std::exception &ex) { + Rf_error("%s\n", ex.what()); + } + + R_set_altrep_data1(out, R_ptr); + UNPROTECT(1); + } + + Rf_setAttrib(out, R_NamesSymbol, R_ptr_name); + Rf_setAttrib(out, R_NamesSymbol, R_ptr_class); + UNPROTECT(3); + return out; +} + +SEXP get_altrepped_null_pointer() +{ + SEXP R_ptr_name = PROTECT(Rf_mkString("ptr")); + SEXP R_ptr_class = PROTECT(Rf_mkString("isotree_altrepped_handle")); + SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); + SEXP out = PROTECT(R_new_altrep(altrepped_pointer_NullPointer, R_ptr, R_NilValue)); + Rf_setAttrib(out, R_NamesSymbol, R_ptr_name); + Rf_setAttrib(out, R_ClassSymbol, R_ptr_class); + UNPROTECT(4); + return out; +} + +SEXP safe_get_altrepped_null_pointer(void *unused) +{ + return get_altrepped_null_pointer(); +} + +SEXP serialize_altrepped_null(SEXP altrepped_obj) +{ + return Rf_allocVector(RAWSXP, 0); +} + +SEXP deserialize_altrepped_null(SEXP cls, SEXP R_state) +{ + return get_altrepped_null_pointer(); +} + +SEXP duplicate_altrepped_pointer(SEXP altrepped_obj, Rboolean deep) +{ + return get_altrepped_null_pointer(); +} + +Rboolean inspect_altrepped_pointer(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int)) +{ + Rcpp::Rcout << "Altrepped pointer [address:" << R_ExternalPtrAddr(R_altrep_data1(x)) << "]\n"; + return TRUE; +} + +template <class Model> +Model* get_pointer_from_altrep(SEXP altrepped_obj) +{ + return (Model*)R_ExternalPtrAddr(R_altrep_data1(altrepped_obj)); +} + +template <class Model> +Model* get_pointer_from_xptr(SEXP R_ptr) +{ + return (Model*)R_ExternalPtrAddr(R_ptr); +} + +// [[Rcpp::init]] +void init_altrepped_vectors(DllInfo* dll) +{ + altrepped_pointer_IsoForest = R_make_altlist_class("altrepped_pointer_IsoForest", "isotree", dll); + R_set_altrep_Length_method(altrepped_pointer_IsoForest, altrepped_pointer_length); + R_set_altrep_Inspect_method(altrepped_pointer_IsoForest, inspect_altrepped_pointer); + R_set_altrep_Serialized_state_method(altrepped_pointer_IsoForest, serialize_altrepped_pointer<IsoForest>); + R_set_altrep_Unserialize_method(altrepped_pointer_IsoForest, deserialize_altrepped_pointer<IsoForest>); + R_set_altrep_Duplicate_method(altrepped_pointer_IsoForest, duplicate_altrepped_pointer<IsoForest>); + R_set_altlist_Elt_method(altrepped_pointer_IsoForest, get_element_from_altrepped_obj); + + altrepped_pointer_ExtIsoForest = R_make_altlist_class("altrepped_pointer_ExtIsoForest", "isotree", dll); + R_set_altrep_Length_method(altrepped_pointer_ExtIsoForest, altrepped_pointer_length); + R_set_altrep_Inspect_method(altrepped_pointer_ExtIsoForest, inspect_altrepped_pointer); + R_set_altrep_Serialized_state_method(altrepped_pointer_ExtIsoForest, serialize_altrepped_pointer<ExtIsoForest>); + R_set_altrep_Unserialize_method(altrepped_pointer_ExtIsoForest, deserialize_altrepped_pointer<ExtIsoForest>); + R_set_altrep_Duplicate_method(altrepped_pointer_ExtIsoForest, duplicate_altrepped_pointer<ExtIsoForest>); + R_set_altlist_Elt_method(altrepped_pointer_ExtIsoForest, get_element_from_altrepped_obj); + + altrepped_pointer_Imputer = R_make_altlist_class("altrepped_pointer_Imputer", "isotree", dll); + R_set_altrep_Length_method(altrepped_pointer_Imputer, altrepped_pointer_length); + R_set_altrep_Inspect_method(altrepped_pointer_Imputer, inspect_altrepped_pointer); + R_set_altrep_Serialized_state_method(altrepped_pointer_Imputer, serialize_altrepped_pointer<Imputer>); + R_set_altrep_Unserialize_method(altrepped_pointer_Imputer, deserialize_altrepped_pointer<Imputer>); + R_set_altrep_Duplicate_method(altrepped_pointer_Imputer, duplicate_altrepped_pointer<Imputer>); + R_set_altlist_Elt_method(altrepped_pointer_Imputer, get_element_from_altrepped_obj); + + altrepped_pointer_TreesIndexer = R_make_altlist_class("altrepped_pointer_TreesIndexer", "isotree", dll); + R_set_altrep_Length_method(altrepped_pointer_TreesIndexer, altrepped_pointer_length); + R_set_altrep_Inspect_method(altrepped_pointer_TreesIndexer, inspect_altrepped_pointer); + R_set_altrep_Serialized_state_method(altrepped_pointer_TreesIndexer, serialize_altrepped_pointer<TreesIndexer>); + R_set_altrep_Unserialize_method(altrepped_pointer_TreesIndexer, deserialize_altrepped_pointer<TreesIndexer>); + R_set_altrep_Duplicate_method(altrepped_pointer_TreesIndexer, duplicate_altrepped_pointer<TreesIndexer>); + R_set_altlist_Elt_method(altrepped_pointer_TreesIndexer, get_element_from_altrepped_obj); + + altrepped_pointer_NullPointer = R_make_altlist_class("altrepped_pointer_NullPointer", "isotree", dll); + R_set_altrep_Length_method(altrepped_pointer_NullPointer, altrepped_pointer_length); + R_set_altrep_Inspect_method(altrepped_pointer_NullPointer, inspect_altrepped_pointer); + R_set_altrep_Serialized_state_method(altrepped_pointer_NullPointer, serialize_altrepped_null); + R_set_altrep_Unserialize_method(altrepped_pointer_NullPointer, deserialize_altrepped_null); + R_set_altrep_Duplicate_method(altrepped_pointer_NullPointer, duplicate_altrepped_pointer); + R_set_altlist_Elt_method(altrepped_pointer_NullPointer, get_element_from_altrepped_obj); +} + double* set_R_nan_as_C_nan(double *x, size_t n, std::vector<double> &v, int nthreads) { v.assign(x, x + n); for (size_t i = 0; i < n; i++) if (unlikely(std::isnan(v[i]))) v[i] = NAN; @@ -221,10 +487,17 @@ for (size_t i = 0; i < n; i++) if (unlikely(std::isnan(x[i]))) x[i] = NAN; return x; } +TreesIndexer* get_indexer_ptr_from_R_obj(SEXP indexer_R_ptr) +{ + TreesIndexer *out = get_pointer_from_xptr<TreesIndexer>(indexer_R_ptr); + if (out && out->indices.empty()) out = nullptr; + return out; +} + // [[Rcpp::export(rng = false)]] Rcpp::List fit_model(Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp::IntegerVector ncat, Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr, Rcpp::NumericVector sample_weights, Rcpp::NumericVector col_weights, size_t nrows, size_t ncols_numeric, size_t ncols_categ, size_t ndim, size_t ntry, @@ -240,11 +513,11 @@ double prob_pick_col_by_kurt, double min_gain, Rcpp::CharacterVector cat_split_type, Rcpp::CharacterVector new_cat_action, Rcpp::CharacterVector missing_action, bool all_perm, bool build_imputer, bool output_imputations, size_t min_imp_obs, Rcpp::CharacterVector depth_imp, Rcpp::CharacterVector weigh_imp_rows, - int random_seed, bool use_long_double, int nthreads) + int random_seed, bool use_long_double, int nthreads, bool lazy_serialization) { double* numeric_data_ptr = NULL; int* categ_data_ptr = NULL; int* ncat_ptr = NULL; double* Xc_ptr = NULL; @@ -382,22 +655,41 @@ depths = Rcpp::NumericVector(nrows); depths_ptr = REAL(depths); } Rcpp::List outp = Rcpp::List::create( - Rcpp::_["depths"] = depths, - Rcpp::_["tmat"] = tmat, - Rcpp::_["dmat"] = dmat, - Rcpp::_["ptr"] = R_NilValue, - Rcpp::_["serialized"] = R_NilValue, - Rcpp::_["imp_ptr"] = R_NilValue, - Rcpp::_["imp_ser"] = R_NilValue, - Rcpp::_["imputed_num"] = R_NilValue, - Rcpp::_["imputed_cat"] = R_NilValue, - Rcpp::_["err"] = Rcpp::LogicalVector::create(1) + Rcpp::_["depths"] = depths, + Rcpp::_["tmat"] = tmat, + Rcpp::_["dmat"] = dmat, + Rcpp::_["model"] = R_NilValue, + Rcpp::_["imputer"] = R_NilValue, + Rcpp::_["indexer"] = R_NilValue, + Rcpp::_["imputed_num"] = R_NilValue, + Rcpp::_["imputed_cat"] = R_NilValue, + Rcpp::_["err"] = Rcpp::LogicalVector::create(1) ); + Rcpp::List model_lst_nonlazy = Rcpp::List::create( + Rcpp::_["ptr"] = R_NilValue, + Rcpp::_["ser"] = R_NilValue + ); + + Rcpp::List imputer_lst_nonlazy = Rcpp::List::create( + Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false), + Rcpp::_["ser"] = R_NilValue + ); + + if (lazy_serialization) { + outp["indexer"] = get_altrepped_null_pointer(); + } + else { + outp["indexer"] = Rcpp::List::create( + Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false), + Rcpp::_["ser"] = R_NilValue + ); + } + std::unique_ptr<IsoForest> model_ptr(nullptr); std::unique_ptr<ExtIsoForest> ext_model_ptr(nullptr); std::unique_ptr<Imputer> imputer_ptr(nullptr); if (ndim == 1) @@ -406,13 +698,11 @@ ext_model_ptr = std::unique_ptr<ExtIsoForest>(new ExtIsoForest()); if (build_imputer) imputer_ptr = std::unique_ptr<Imputer>(new Imputer()); - int ret_val; - try { - ret_val = + int ret_val = fit_iforest(model_ptr.get(), ext_model_ptr.get(), numeric_data_ptr, ncols_numeric, categ_data_ptr, ncols_categ, ncat_ptr, Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr, ndim, ntry, coef_type_C, coef_by_prop, @@ -430,82 +720,92 @@ min_gain, missing_action_C, cat_split_type_C, new_cat_action_C, all_perm, imputer_ptr.get(), min_imp_obs, depth_imp_C, weigh_imp_rows_C, output_imputations, (uint64_t) random_seed, use_long_double, nthreads); - } - catch (std::bad_alloc &e) { - throw_mem_err(); - } - Rcpp::checkUserInterrupt(); + Rcpp::checkUserInterrupt(); /* <- nothing is returned in this case */ + /* Note to self: the procedure has its own interrupt checker, so when an interrupt + signal is triggered, first it will print a message about it, then re-issue the + signal, then check for interrupt through Rcpp's, which will return nothing to + the outside and will not raise any error. In this case, at least the user will + see the error message. Note that Rcpp's interrupt non-return, unlike R's, triggers + stack unwinding for C++ objects. */ + + /* Note to self: since the function for fitting the model uses the C++ exception system, + and the stop signals are translated into Rcpp stops, this section below should not + be reachable anyhow. */ if (ret_val == EXIT_FAILURE) { + Rcpp::Rcerr << "Unexpected error" << std::endl; return Rcpp::unwindProtect(safe_errlist, nullptr); } if (calc_dist && sq_dist) tmat_to_dense(tmat_ptr, dmat_ptr, nrows, standardize_dist? 0. : std::numeric_limits<double>::infinity()); bool serialization_failed = false; - Rcpp::RawVector serialized_obj; - try { + + if (lazy_serialization) + { + if (ndim == 1) { + outp["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&model_ptr); + } + else { + outp["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&ext_model_ptr); + } + + if (build_imputer) { + outp["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&imputer_ptr); + } + else { + outp["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr); + } + } + + else + { + Rcpp::RawVector serialized_obj; + /* Note to self: the serialization functions use unwind protection internally. */ if (ndim == 1) serialized_obj = serialize_cpp_obj(model_ptr.get()); else serialized_obj = serialize_cpp_obj(ext_model_ptr.get()); - } - catch (std::bad_alloc &e) { - throw_mem_err(); - } - if (unlikely(!serialized_obj.size())) serialization_failed = true; - if (unlikely(serialization_failed)) { - if (ndim == 1) - model_ptr.reset(); - else - ext_model_ptr.reset(); - } - if (!serialization_failed) - { - outp["serialized"] = serialized_obj; + if (unlikely(!serialized_obj.size())) serialization_failed = true; + if (unlikely(serialization_failed)) { + throw Rcpp::exception("Error: insufficient memory\n"); + } + + model_lst_nonlazy["ser"] = serialized_obj; if (ndim == 1) { - outp["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model_ptr.get()); + model_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model_ptr.get()); model_ptr.release(); } else { - outp["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, ext_model_ptr.get()); + model_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, ext_model_ptr.get()); ext_model_ptr.release(); } - } else - outp["ptr"] = R_NilValue; - if (build_imputer && !serialization_failed) - { - try { - outp["imp_ser"] = serialize_cpp_obj(imputer_ptr.get()); - } - catch (std::bad_alloc &e) { - throw_mem_err(); - } - if (!Rf_xlength(outp["imp_ser"])) + outp["model"] = model_lst_nonlazy; + + if (build_imputer) { - serialization_failed = true; - imputer_ptr.reset(); - if (ndim == 1) - model_ptr.reset(); - else - ext_model_ptr.reset(); - outp["imp_ptr"] = R_NilValue; - outp["ptr"] = R_NilValue; - } else { - outp["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer_ptr.get()); + imputer_lst_nonlazy["ser"] = serialize_cpp_obj(imputer_ptr.get()); + if (!Rf_xlength(imputer_lst_nonlazy["ser"])) + { + throw Rcpp::exception("Error: insufficient memory\n"); + } + + imputer_lst_nonlazy["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer_ptr.get()); imputer_ptr.release(); } + + outp["imputer"] = imputer_lst_nonlazy; } - if (output_imputations && !serialization_failed) + if (output_imputations) { outp["imputed_num"] = Xcpp; outp["imputed_cat"] = X_cat; } @@ -532,16 +832,17 @@ Rcpp::CharacterVector depth_imp, Rcpp::CharacterVector weigh_imp_rows, bool all_perm, Rcpp::NumericVector ref_X_num, Rcpp::IntegerVector ref_X_cat, Rcpp::NumericVector ref_Xc, Rcpp::IntegerVector ref_Xc_ind, Rcpp::IntegerVector ref_Xc_indptr, uint64_t random_seed, bool use_long_double, - Rcpp::List &model_cpp_obj_update, Rcpp::List &model_params_update) + Rcpp::List &model_cpp_obj_update, Rcpp::List &model_params_update, + bool is_altrepped) { Rcpp::List out = Rcpp::List::create( - Rcpp::_["serialized"] = R_NilValue, - Rcpp::_["imp_ser"] = R_NilValue, - Rcpp::_["ind_ser"] = R_NilValue + Rcpp::_["model_ser"] = R_NilValue, + Rcpp::_["imputer_ser"] = R_NilValue, + Rcpp::_["indexer_ser"] = R_NilValue ); Rcpp::IntegerVector ntrees_plus1 = Rcpp::IntegerVector::create(Rf_asInteger(model_params_update["ntrees"]) + 1); double* numeric_data_ptr = NULL; @@ -672,14 +973,11 @@ ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr)); if (build_imputer) imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr)); - if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL) - indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr)); - if (indexer_ptr != NULL && indexer_ptr->indices.empty()) - indexer_ptr = NULL; + indexer_ptr = get_indexer_ptr_from_R_obj(indexer_R_ptr); size_t old_ntrees = (ndim == 1)? (model_ptr->trees.size()) : (ext_model_ptr->hplanes.size()); add_tree(model_ptr, ext_model_ptr, numeric_data_ptr, ncols_numeric, @@ -704,10 +1002,13 @@ ref_Xc_ptr, ref_Xc_ind_ptr, ref_Xc_indptr_ptr, (uint64_t)random_seed, use_long_double); Rcpp::RawVector new_serialized, new_imp_serialized, new_ind_serialized; size_t new_size; + + if (is_altrepped) goto dont_serialize; + try { if (ndim == 1) { if (serialized_obj.size() && @@ -717,21 +1018,21 @@ new_size = serialized_obj.size() + determine_serialized_size_additional_trees(*model_ptr, old_ntrees); new_serialized = resize_vec(serialized_obj, new_size); char *temp = (char*)RAW(new_serialized); incremental_serialize_isotree(*model_ptr, temp); - out["serialized"] = new_serialized; + out["model_ser"] = new_serialized; } catch (std::runtime_error &e) { goto serialize_anew_singlevar; } } else { serialize_anew_singlevar: - out["serialized"] = serialize_cpp_obj(model_ptr); + out["model_ser"] = serialize_cpp_obj(model_ptr); } } else { @@ -742,21 +1043,21 @@ new_size = serialized_obj.size() + determine_serialized_size_additional_trees(*ext_model_ptr, old_ntrees); new_serialized = resize_vec(serialized_obj, new_size); char *temp = (char*)RAW(new_serialized); incremental_serialize_isotree(*ext_model_ptr, temp); - out["serialized"] = new_serialized; + out["model_ser"] = new_serialized; } catch (std::runtime_error &e) { goto serialize_anew_ext; } } else { serialize_anew_ext: - out["serialized"] = serialize_cpp_obj(ext_model_ptr); + out["model_ser"] = serialize_cpp_obj(ext_model_ptr); } } if (imputer_ptr != NULL) { @@ -767,21 +1068,21 @@ new_size = serialized_imputer.size() + determine_serialized_size_additional_trees(*imputer_ptr, old_ntrees); new_imp_serialized = resize_vec(serialized_imputer, new_size); char *temp = (char*)RAW(new_imp_serialized); incremental_serialize_isotree(*imputer_ptr, temp); - out["imp_ser"] = new_imp_serialized; + out["imputer_ser"] = new_imp_serialized; } catch (std::runtime_error &e) { goto serialize_anew_imp; } } else { serialize_anew_imp: - out["imp_ser"] = serialize_cpp_obj(imputer_ptr); + out["imputer_ser"] = serialize_cpp_obj(imputer_ptr); } } if (indexer_ptr != NULL) { @@ -792,21 +1093,21 @@ new_size = serialized_indexer.size() + determine_serialized_size_additional_trees(*indexer_ptr, old_ntrees); new_ind_serialized = resize_vec(serialized_indexer, new_size); char *temp = (char*)RAW(new_ind_serialized); incremental_serialize_isotree(*indexer_ptr, temp); - out["ind_ser"] = new_ind_serialized; + out["indexer_ser"] = new_ind_serialized; } catch (std::runtime_error &e) { goto serialize_anew_ind; } } else { serialize_anew_ind: - out["ind_ser"] = serialize_cpp_obj(indexer_ptr); + out["indexer_ser"] = serialize_cpp_obj(indexer_ptr); } } } catch (...) @@ -820,15 +1121,31 @@ if (indexer_ptr != NULL) indexer_ptr->indices.resize(old_ntrees); throw; } - model_cpp_obj_update["serialized"] = out["serialized"]; - if (build_imputer) - model_cpp_obj_update["imp_ser"] = out["imp_ser"]; - if (indexer_ptr != NULL) - model_cpp_obj_update["ind_ser"] = out["ind_ser"]; + { + Rcpp::List model_lst = model_cpp_obj_update["model"]; + model_lst["ser"] = out["model_ser"]; + model_cpp_obj_update["model"] = model_lst; + + if (build_imputer) + { + Rcpp::List imputer_lst = model_cpp_obj_update["imputer"]; + imputer_lst["ser"] = out["imputer_ser"]; + model_cpp_obj_update["imputer"] = imputer_lst; + } + + if (indexer_ptr) + { + Rcpp::List indexer_lst = model_cpp_obj_update["indexer"]; + indexer_lst["ser"] = out["indexer_ser"]; + model_cpp_obj_update["indexer"] = indexer_lst; + } + } + + dont_serialize: model_params_update["ntrees"] = ntrees_plus1; } // [[Rcpp::export(rng = false)]] void predict_iso(SEXP model_R_ptr, bool is_extended, @@ -878,18 +1195,14 @@ int *tree_num_ptr = tree_num.size()? INTEGER(tree_num) : NULL; IsoForest* model_ptr = NULL; ExtIsoForest* ext_model_ptr = NULL; if (is_extended) - ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr)); + ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr); else - model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr)); - TreesIndexer* indexer = NULL; - if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL) - indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr)); - if (indexer != NULL && indexer->indices.empty()) - indexer = NULL; + model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr); + TreesIndexer* indexer = get_indexer_ptr_from_R_obj(indexer_R_ptr); MissingAction missing_action = is_extended? ext_model_ptr->missing_action : model_ptr->missing_action; @@ -949,21 +1262,17 @@ double* dmat_ptr = (sq_dist & !n_from)? REAL(dmat) : NULL; double* rmat_ptr = n_from? REAL(rmat) : NULL; IsoForest* model_ptr = NULL; ExtIsoForest* ext_model_ptr = NULL; - TreesIndexer* indexer = NULL; + TreesIndexer* indexer = get_indexer_ptr_from_R_obj(indexer_R_ptr); if (is_extended) - ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr)); + ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr); else - model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr)); - if (!Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL) - indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr)); - if (indexer != NULL && (indexer->indices.empty() || (!as_kernel && indexer->indices.front().node_distances.empty()))) - indexer = NULL; + model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr); - if (use_reference_points && indexer != NULL && !indexer->indices.front().reference_points.empty()) { + if (use_reference_points && indexer && !indexer->indices.front().reference_points.empty()) { tmat_ptr = NULL; dmat_ptr = NULL; rmat_ptr = REAL(rmat); } else { @@ -1042,17 +1351,19 @@ if (Xr.size()) Xr_ptr = set_R_nan_as_C_nan(Xr_ptr, Xr.size(), nthreads); IsoForest* model_ptr = NULL; ExtIsoForest* ext_model_ptr = NULL; if (is_extended) - ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr)); + ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr); else - model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr)); + model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr); - Imputer* imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr)); + Imputer* imputer_ptr = get_pointer_from_xptr<Imputer>(imputer_R_ptr); + if (!imputer_ptr) throw Rcpp::exception("Error: requested missing value imputation, but model was built without imputer.\n"); + impute_missing_values(numeric_data_ptr, categ_data_ptr, true, Xr_ptr, Xr_ind_ptr, Xr_indptr_ptr, nrows, use_long_double, nthreads, model_ptr, ext_model_ptr, *imputer_ptr); @@ -1062,90 +1373,159 @@ Rcpp::_["X_cat"] = X_cat ); } // [[Rcpp::export(rng = false)]] -void drop_imputer(Rcpp::List lst_modify, Rcpp::List lst_modify2) +void drop_imputer(bool is_altrepped, bool free_cpp, + SEXP lst_imputer, Rcpp::List lst_cpp_objects, Rcpp::List lst_params) { - Rcpp::RawVector empty_ser = Rcpp::RawVector(); - Rcpp::LogicalVector FalseObj = Rcpp::LogicalVector::create(false); - Rcpp::XPtr<Imputer> imp_ptr = lst_modify["imp_ptr"]; - imp_ptr.release(); + SEXP FalseObj = PROTECT(Rf_ScalarLogical(0)); + SEXP blank_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); + SEXP altrepped_null = PROTECT(get_altrepped_null_pointer()); + + if (is_altrepped) { - lst_modify["imp_ser"] = empty_ser; - lst_modify2["build_imputer"] = FalseObj; + if (free_cpp) { + SEXP imp_R_ptr = R_altrep_data1(lst_imputer); + Imputer* imputer_ptr = (Imputer*)R_ExternalPtrAddr(imp_R_ptr); + delete imputer_ptr; + R_SetExternalPtrAddr(imp_R_ptr, nullptr); + R_ClearExternalPtr(imp_R_ptr); + } + + lst_cpp_objects["imputer"] = altrepped_null; + + } + + else { + + if (free_cpp) { + SEXP imp_R_ptr = VECTOR_ELT(lst_imputer, 0); + Imputer* imputer_ptr = get_pointer_from_xptr<Imputer>(imp_R_ptr); + delete imputer_ptr; + R_SetExternalPtrAddr(imp_R_ptr, nullptr); + R_ClearExternalPtr(imp_R_ptr); + SET_VECTOR_ELT(lst_imputer, 0, imp_R_ptr); + } + + SET_VECTOR_ELT(lst_imputer, 0, blank_ptr); + SET_VECTOR_ELT(lst_imputer, 1, R_NilValue); + } + + lst_params["build_imputer"] = FalseObj; + UNPROTECT(3); } // [[Rcpp::export(rng = false)]] -void drop_indexer(Rcpp::List lst_modify, Rcpp::List lst_modify2) +void drop_indexer(bool is_altrepped, bool free_cpp, + SEXP lst_indexer, Rcpp::List lst_cpp_objects, Rcpp::List lst_metadata) { - Rcpp::XPtr<TreesIndexer> empty_ptr = Rcpp::XPtr<TreesIndexer>(nullptr, false); - Rcpp::RawVector empty_ser = Rcpp::RawVector(); - Rcpp::CharacterVector empty_char = Rcpp::CharacterVector(); - Rcpp::XPtr<TreesIndexer> indexer = lst_modify["indexer"]; - indexer.release(); + SEXP empty_str = PROTECT(Rf_allocVector(STRSXP, 0)); + SEXP blank_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); + SEXP altrepped_null = PROTECT(get_altrepped_null_pointer()); + + if (is_altrepped) { - lst_modify["ind_ser"] = empty_ser; - lst_modify2["reference_names"] = empty_char; + if (free_cpp) { + SEXP ind_R_ptr = R_altrep_data1(lst_indexer); + TreesIndexer* indexer_ptr = (TreesIndexer*)R_ExternalPtrAddr(ind_R_ptr); + delete indexer_ptr; + R_SetExternalPtrAddr(ind_R_ptr, nullptr); + R_ClearExternalPtr(ind_R_ptr); + } + + lst_cpp_objects["indexer"] = altrepped_null; + } + + else { + + if (free_cpp) { + SEXP ind_R_ptr = VECTOR_ELT(lst_indexer, 0); + TreesIndexer* indexer_ptr = get_pointer_from_xptr<TreesIndexer>(ind_R_ptr); + delete indexer_ptr; + R_SetExternalPtrAddr(ind_R_ptr, nullptr); + R_ClearExternalPtr(ind_R_ptr); + SET_VECTOR_ELT(lst_indexer, 0, ind_R_ptr); + } + + SET_VECTOR_ELT(lst_indexer, 0, blank_ptr); + SET_VECTOR_ELT(lst_indexer, 1, R_NilValue); + } + + lst_metadata["reference_names"] = empty_str; + UNPROTECT(3); } // [[Rcpp::export(rng = false)]] -void drop_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2) +void drop_reference_points(bool is_altrepped, SEXP lst_indexer, Rcpp::List lst_cpp_objects, Rcpp::List lst_metadata) { - Rcpp::CharacterVector empty_char = Rcpp::CharacterVector(); - Rcpp::RawVector empty_ser = Rcpp::RawVector(); - Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"]; - TreesIndexer *indexer_ptr = indexer_R_ptr.get(); - if (indexer_ptr == NULL) { - lst_modify["ind_ser"] = empty_ser; - lst_modify2["reference_names"] = empty_char; - return; + SEXP empty_str = PROTECT(Rf_allocVector(STRSXP, 0)); + + if (is_altrepped) + { + SEXP ind_R_ptr = R_altrep_data1(lst_indexer); + TreesIndexer* indexer_ptr = (TreesIndexer*)R_ExternalPtrAddr(ind_R_ptr); + if (!indexer_ptr) return; + + for (auto &tree : indexer_ptr->indices) + { + tree.reference_points.clear(); + tree.reference_indptr.clear(); + tree.reference_mapping.clear(); + } } - if (indexer_ptr->indices.empty()) { - indexer_R_ptr.release(); - lst_modify["ind_ser"] = empty_ser; - lst_modify2["reference_names"] = empty_char; - return; - } - if (indexer_ptr->indices.front().reference_points.empty()) { - lst_modify2["reference_names"] = empty_char; - return; - } - std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer_ptr)); - for (auto &tree : new_indexer->indices) + else { - tree.reference_points.clear(); - tree.reference_indptr.clear(); - tree.reference_mapping.clear(); + SEXP ind_R_ptr = VECTOR_ELT(lst_indexer, 0); + TreesIndexer* indexer_ptr = get_pointer_from_xptr<TreesIndexer>(ind_R_ptr); + if (!indexer_ptr) return; + + std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer_ptr)); + for (auto &tree : new_indexer->indices) + { + tree.reference_points.clear(); + tree.reference_indptr.clear(); + tree.reference_mapping.clear(); + } + + SET_VECTOR_ELT(lst_indexer, 1, serialize_cpp_obj(new_indexer.get())); + *indexer_ptr = std::move(*new_indexer); + new_indexer.release(); } - Rcpp::RawVector ind_ser = serialize_cpp_obj(new_indexer.get()); - *indexer_ptr = std::move(*new_indexer); - new_indexer.release(); - lst_modify["ind_ser"] = ind_ser; - lst_modify2["reference_names"] = empty_char; + + lst_metadata["reference_names"] = empty_str; + UNPROTECT(1); } // [[Rcpp::export(rng = false)]] Rcpp::List subset_trees ( SEXP model_R_ptr, SEXP imputer_R_ptr, SEXP indexer_R_ptr, - bool is_extended, bool has_imputer, + bool is_extended, bool is_altrepped, Rcpp::IntegerVector trees_take ) { - bool has_indexer = !Rf_isNull(indexer_R_ptr) && R_ExternalPtrAddr(indexer_R_ptr) != NULL; - Rcpp::List out = Rcpp::List::create( + Rcpp::_["model"] = R_NilValue, + Rcpp::_["imputer"] = R_NilValue, + Rcpp::_["indexer"] = R_NilValue + ); + Rcpp::List lst_model = Rcpp::List::create( Rcpp::_["ptr"] = R_NilValue, - Rcpp::_["serialized"] = R_NilValue, - Rcpp::_["imp_ptr"] = R_NilValue, - Rcpp::_["imp_ser"] = R_NilValue, - Rcpp::_["indexer"] = R_NilValue, - Rcpp::_["ind_ser"] = R_NilValue + Rcpp::_["ser"] = R_NilValue ); + Rcpp::List lst_imputer = Rcpp::List::create( + Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false), + Rcpp::_["ser"] = R_NilValue + ); + Rcpp::List lst_indexer = Rcpp::List::create( + Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false), + Rcpp::_["ser"] = R_NilValue + ); + IsoForest* model_ptr = NULL; ExtIsoForest* ext_model_ptr = NULL; Imputer* imputer_ptr = NULL; TreesIndexer* indexer_ptr = NULL; std::unique_ptr<IsoForest> new_model_ptr(nullptr); @@ -1159,20 +1539,19 @@ } else { model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr)); new_model_ptr = std::unique_ptr<IsoForest>(new IsoForest()); } - - if (has_imputer) { - imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr)); - new_imputer_ptr = std::unique_ptr<Imputer>(new Imputer()); + imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imputer_R_ptr)); + if (imputer_ptr) { + new_imputer_ptr = std::unique_ptr<Imputer>(new Imputer()); } - if (has_indexer) { - indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr)); - new_indexer_ptr = std::unique_ptr<TreesIndexer>(new TreesIndexer()); + indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr)); + if (indexer_ptr) { + new_indexer_ptr = std::unique_ptr<TreesIndexer>(new TreesIndexer()); } std::unique_ptr<size_t[]> trees_take_(new size_t[trees_take.size()]); for (decltype(trees_take.size()) ix = 0; ix < trees_take.size(); ix++) trees_take_[ix] = (size_t)(trees_take[ix] - 1); @@ -1182,35 +1561,53 @@ imputer_ptr, new_imputer_ptr.get(), indexer_ptr, new_indexer_ptr.get(), trees_take_.get(), trees_take.size()); trees_take_.reset(); - if (!is_extended) - out["serialized"] = serialize_cpp_obj(new_model_ptr.get()); + if (is_altrepped) + { + out["model"] = is_extended? + Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&new_ext_model_ptr) + : + Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&new_model_ptr); + out["imputer"] = imputer_ptr? + Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&new_imputer_ptr) + : + Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr); + out["indexer"] = indexer_ptr? + Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&new_indexer_ptr) + : + Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr); + } + else - out["serialized"] = serialize_cpp_obj(new_ext_model_ptr.get()); - if (has_imputer) - out["imp_ser"] = serialize_cpp_obj(new_imputer_ptr.get()); - if (has_indexer) - out["ind_ser"] = serialize_cpp_obj(new_indexer_ptr.get()); + { + lst_model["ser"] = is_extended? serialize_cpp_obj(new_ext_model_ptr.get()) : serialize_cpp_obj(new_model_ptr.get()); + if (imputer_ptr) lst_imputer["ser"] = serialize_cpp_obj(new_imputer_ptr.get()); + if (indexer_ptr) lst_indexer["ser"] = serialize_cpp_obj(new_indexer_ptr.get()); - if (!is_extended) { - out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, new_model_ptr.get()); + lst_model["ptr"] = is_extended? + Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, new_ext_model_ptr.get()) + : + Rcpp::unwindProtect(safe_XPtr<IsoForest>, new_model_ptr.get()); new_model_ptr.release(); + + if (imputer_ptr) { + lst_imputer["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, new_imputer_ptr.get()); + new_imputer_ptr.release(); + } + + if (indexer_ptr) { + lst_indexer["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, new_indexer_ptr.get()); + new_indexer_ptr.release(); + } + + out["model"] = lst_model; + out["imputer"] = lst_imputer; + out["indexer"] = lst_indexer; } - else { - out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, new_ext_model_ptr.get()); - new_ext_model_ptr.release(); - } - if (has_imputer) { - out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, new_imputer_ptr.get()); - new_imputer_ptr.release(); - } - if (has_indexer) { - out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, new_indexer_ptr.get()); - new_indexer_ptr.release(); - } + return out; } // [[Rcpp::export(rng = false)]] void inplace_set_to_zero(SEXP obj) @@ -1249,16 +1646,16 @@ size_t ntrees; IsoForest* model_ptr = NULL; ExtIsoForest* ext_model_ptr = NULL; if (is_extended) { - ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr)); + ext_model_ptr = get_pointer_from_xptr<ExtIsoForest>(model_R_ptr); ntrees = ext_model_ptr->hplanes.size(); } else { - model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr)); + model_ptr = get_pointer_from_xptr<IsoForest>(model_R_ptr); ntrees = model_ptr->trees.size(); } Rcpp::IntegerVector n_nodes(ntrees); Rcpp::IntegerVector n_terminal(ntrees); @@ -1280,76 +1677,66 @@ bool is_extended, Rcpp::RawVector serialized_obj, Rcpp::RawVector serialized_imputer, Rcpp::RawVector serialized_indexer, Rcpp::List &model_cpp_obj_update, - Rcpp::List &model_params_update) + Rcpp::List &model_params_update, + bool is_altrepped) { - if ((!Rf_isNull(imp_R_ptr) && R_ExternalPtrAddr(imp_R_ptr) != NULL) - && - !(!Rf_isNull(oimp_R_ptr) && R_ExternalPtrAddr(oimp_R_ptr) != NULL)) - { - Rcpp::stop("Model to append trees to has imputer, but model to append from doesn't. Try dropping the imputer.\n"); - } - if ((!Rf_isNull(ind_R_ptr) && R_ExternalPtrAddr(ind_R_ptr) != NULL) - && - !(!Rf_isNull(oind_R_ptr) && R_ExternalPtrAddr(oind_R_ptr) != NULL)) - { - Rcpp::stop("Model to append trees to has indexer, but model to append from doesn't. Try dropping the indexer.\n"); - } - Rcpp::List out = Rcpp::List::create( - Rcpp::_["serialized"] = R_NilValue, - Rcpp::_["imp_ser"] = R_NilValue, - Rcpp::_["ind_ser"] = R_NilValue + Rcpp::_["model_ser"] = R_NilValue, + Rcpp::_["imputer_ser"] = R_NilValue, + Rcpp::_["indexer_ser"] = R_NilValue ); Rcpp::IntegerVector ntrees_new = Rcpp::IntegerVector::create(Rf_asInteger(model_params_update["ntrees"])); - IsoForest* model_ptr = NULL; - IsoForest* other_ptr = NULL; - ExtIsoForest* ext_model_ptr = NULL; - ExtIsoForest* ext_other_ptr = NULL; - Imputer* imputer_ptr = NULL; - Imputer* oimputer_ptr = NULL; - TreesIndexer* indexer_ptr = NULL; - TreesIndexer* oindexer_ptr = NULL; + IsoForest* model_ptr = nullptr; + IsoForest* other_ptr = nullptr; + ExtIsoForest* ext_model_ptr = nullptr; + ExtIsoForest* ext_other_ptr = nullptr; + Imputer* imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr)); + Imputer* oimputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(oimp_R_ptr)); + TreesIndexer* indexer_ptr = get_indexer_ptr_from_R_obj(ind_R_ptr); + TreesIndexer* oindexer_ptr = get_indexer_ptr_from_R_obj(oind_R_ptr); size_t old_ntrees; if (is_extended) { ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr)); ext_other_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(other_R_ptr)); old_ntrees = ext_model_ptr->hplanes.size(); + if (ext_model_ptr == ext_other_ptr) { + throw Rcpp::exception("Error: attempting to append trees from one model to itself."); + } } else { model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr)); other_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(other_R_ptr)); old_ntrees = model_ptr->trees.size(); + if (model_ptr == other_ptr) { + throw Rcpp::exception("Error: attempting to append trees from one model to itself."); + } } - if (!Rf_isNull(imp_R_ptr) && !Rf_isNull(oimp_R_ptr) && - R_ExternalPtrAddr(imp_R_ptr) != NULL && - R_ExternalPtrAddr(oimp_R_ptr) != NULL) - { - imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr)); - oimputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(oimp_R_ptr)); + if (imputer_ptr && !oimputer_ptr) { + throw Rcpp::exception("Model to append trees to has imputer, but model to append from doesn't. Try dropping the imputer.\n"); } - if (!Rf_isNull(ind_R_ptr) && !Rf_isNull(oind_R_ptr) && - R_ExternalPtrAddr(ind_R_ptr) != NULL && - R_ExternalPtrAddr(oind_R_ptr) != NULL) - { - indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr)); - oindexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(oind_R_ptr)); + if (indexer_ptr && !oindexer_ptr) { + throw Rcpp::exception("Model to append trees to has indexer, but model to append from doesn't. Try dropping the indexer.\n"); } + merge_models(model_ptr, other_ptr, ext_model_ptr, ext_other_ptr, imputer_ptr, oimputer_ptr, indexer_ptr, oindexer_ptr); Rcpp::RawVector new_serialized, new_imp_serialized, new_ind_serialized; size_t new_size; + + if (is_altrepped) goto dont_serialize; + try { if (!is_extended) { if (serialized_obj.size() && @@ -1359,21 +1746,21 @@ new_size = serialized_obj.size() + determine_serialized_size_additional_trees(*model_ptr, old_ntrees); new_serialized = resize_vec(serialized_obj, new_size); char *temp = (char*)RAW(new_serialized); incremental_serialize_isotree(*model_ptr, temp); - out["serialized"] = new_serialized; + out["model_ser"] = new_serialized; } catch (std::runtime_error &e) { goto serialize_anew_singlevar; } } else { serialize_anew_singlevar: - out["serialized"] = serialize_cpp_obj(model_ptr); + out["model_ser"] = serialize_cpp_obj(model_ptr); } } else { @@ -1384,71 +1771,71 @@ new_size = serialized_obj.size() + determine_serialized_size_additional_trees(*ext_model_ptr, old_ntrees); new_serialized = resize_vec(serialized_obj, new_size); char *temp = (char*)RAW(new_serialized); incremental_serialize_isotree(*ext_model_ptr, temp); - out["serialized"] = new_serialized; + out["model_ser"] = new_serialized; } catch (std::runtime_error &e) { goto serialize_anew_ext; } } else { serialize_anew_ext: - out["serialized"] = serialize_cpp_obj(ext_model_ptr); + out["model_ser"] = serialize_cpp_obj(ext_model_ptr); } } - if (imputer_ptr != NULL) + if (imputer_ptr) { if (serialized_imputer.size() && check_can_undergo_incremental_serialization(*imputer_ptr, (char*)RAW(serialized_imputer))) { try { new_size = serialized_obj.size() + determine_serialized_size_additional_trees(*imputer_ptr, old_ntrees); new_imp_serialized = resize_vec(serialized_imputer, new_size); char *temp = (char*)RAW(new_imp_serialized); incremental_serialize_isotree(*imputer_ptr, temp); - out["imp_ser"] = new_imp_serialized; + out["imputer_ser"] = new_imp_serialized; } catch (std::runtime_error &e) { goto serialize_anew_imp; } } else { serialize_anew_imp: - out["imp_ser"] = serialize_cpp_obj(imputer_ptr); + out["imputer_ser"] = serialize_cpp_obj(imputer_ptr); } } - if (indexer_ptr != NULL) + if (indexer_ptr) { if (serialized_indexer.size() && check_can_undergo_incremental_serialization(*indexer_ptr, (char*)RAW(serialized_indexer))) { try { new_size = serialized_obj.size() + determine_serialized_size_additional_trees(*indexer_ptr, old_ntrees); new_ind_serialized = resize_vec(serialized_indexer, new_size); char *temp = (char*)RAW(new_ind_serialized); incremental_serialize_isotree(*indexer_ptr, temp); - out["ind_ser"] = new_ind_serialized; + out["indexer_ser"] = new_ind_serialized; } catch (std::runtime_error &e) { goto serialize_anew_ind; } } else { serialize_anew_ind: - out["ind_ser"] = serialize_cpp_obj(indexer_ptr); + out["indexer_ser"] = serialize_cpp_obj(indexer_ptr); } } } catch (...) @@ -1456,22 +1843,38 @@ if (!is_extended) model_ptr->trees.resize(old_ntrees); else ext_model_ptr->hplanes.resize(old_ntrees); - if (imputer_ptr != NULL) + if (imputer_ptr) imputer_ptr->imputer_tree.resize(old_ntrees); - if (indexer_ptr != NULL) + if (indexer_ptr) indexer_ptr->indices.resize(old_ntrees); throw; } - model_cpp_obj_update["serialized"] = out["serialized"]; - if (imputer_ptr) - model_cpp_obj_update["imp_ser"] = out["imp_ser"]; - if (indexer_ptr) - model_cpp_obj_update["ind_ser"] = out["ind_ser"]; + { + Rcpp::List model_lst = model_cpp_obj_update["model"]; + model_lst["ser"] = out["model_ser"]; + model_cpp_obj_update["model"] = model_lst; + + if (imputer_ptr) + { + Rcpp::List imputer_lst = model_cpp_obj_update["imputer"]; + imputer_lst["ser"] = out["imputer_ser"]; + model_cpp_obj_update["imputer"] = imputer_lst; + } + + if (indexer_ptr) + { + Rcpp::List indexer_lst = model_cpp_obj_update["indexer"]; + indexer_lst["ser"] = out["indexer_ser"]; + model_cpp_obj_update["indexer"] = indexer_lst; + } + } + + dont_serialize: *(INTEGER(ntrees_new)) = is_extended? ext_model_ptr->hplanes.size() : model_ptr->trees.size(); model_params_update["ntrees"] = ntrees_new; } SEXP alloc_List(void *data) @@ -1549,116 +1952,145 @@ That way, it would avoid an extra copy of the data */ return Rcpp::unwindProtect(safe_CastString, &out); } // [[Rcpp::export(rng = false)]] -Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr, bool has_imputer, SEXP ind_R_ptr) +Rcpp::List copy_cpp_objects(SEXP model_R_ptr, bool is_extended, SEXP imp_R_ptr, SEXP ind_R_ptr, bool lazy_serialization) { - bool has_indexer = !Rf_isNull(ind_R_ptr) && R_ExternalPtrAddr(ind_R_ptr) != NULL; - Rcpp::List out = Rcpp::List::create( - Rcpp::_["ptr"] = R_NilValue, - Rcpp::_["imp_ptr"] = R_NilValue, - Rcpp::_["indexer"] = R_NilValue + Rcpp::_["model"] = Rcpp::XPtr<void*>(nullptr, false), + Rcpp::_["imputer"] = Rcpp::XPtr<void*>(nullptr, false), + Rcpp::_["indexer"] = Rcpp::XPtr<void*>(nullptr, false) ); IsoForest* model_ptr = NULL; ExtIsoForest* ext_model_ptr = NULL; Imputer* imputer_ptr = NULL; TreesIndexer* indexer_ptr = NULL; if (is_extended) ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr)); else model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr)); - if (has_imputer) + if (R_ExternalPtrAddr(imp_R_ptr)) imputer_ptr = static_cast<Imputer*>(R_ExternalPtrAddr(imp_R_ptr)); - if (has_indexer) + if (R_ExternalPtrAddr(ind_R_ptr)) indexer_ptr = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr)); std::unique_ptr<IsoForest> copy_model(new IsoForest()); std::unique_ptr<ExtIsoForest> copy_ext_model(new ExtIsoForest()); std::unique_ptr<Imputer> copy_imputer(new Imputer()); std::unique_ptr<TreesIndexer> copy_indexer(new TreesIndexer()); - if (model_ptr != NULL) + if (model_ptr) *copy_model = *model_ptr; - if (ext_model_ptr != NULL) + if (ext_model_ptr) *copy_ext_model = *ext_model_ptr; - if (imputer_ptr != NULL) + if (imputer_ptr) *copy_imputer = *imputer_ptr; - if (indexer_ptr != NULL) + if (indexer_ptr) *copy_indexer = *indexer_ptr; - if (is_extended) { - out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, copy_ext_model.get()); - copy_ext_model.release(); + if (lazy_serialization) + { + if (is_extended) { + out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, (void*)&copy_ext_model); + } + else { + out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, (void*)&copy_model); + } + + if (imputer_ptr) { + out["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, (void*)&copy_imputer); + } + else { + out["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr); + } + + if (indexer_ptr) { + out["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&copy_indexer); + } + else { + out["indexer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr); + } } - else { - out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, copy_model.get()); - copy_model.release(); + + else + { + if (is_extended) { + out["model"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, copy_ext_model.get()); + copy_ext_model.release(); + } + else { + out["model"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, copy_model.get()); + copy_model.release(); + } + if (imputer_ptr) { + out["imputer"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, copy_imputer.get()); + copy_imputer.release(); + } + if (indexer_ptr) { + out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, copy_indexer.get()); + copy_indexer.release(); + } } - if (has_imputer) { - out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, copy_imputer.get()); - copy_imputer.release(); - } - if (has_indexer) { - out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, copy_indexer.get()); - copy_indexer.release(); - } + return out; } // [[Rcpp::export(rng = false)]] -void build_tree_indices(Rcpp::List lst_modify, bool is_extended, bool with_distances, int nthreads) +void build_tree_indices(Rcpp::List lst_cpp_objects, SEXP ptr_model, bool is_altrepped, bool is_extended, bool with_distances, int nthreads) { - Rcpp::RawVector ind_ser = Rcpp::RawVector(); - Rcpp::List empty_lst = Rcpp::List::create(Rcpp::_["indexer"] = R_NilValue); + Rcpp::List lst_out = Rcpp::List::create( + Rcpp::_["ptr"] = R_NilValue, + Rcpp::_["ser"] = R_NilValue + ); std::unique_ptr<TreesIndexer> indexer(new TreesIndexer()); if (!is_extended) { build_tree_indices(*indexer, - *static_cast<IsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"])), + *static_cast<IsoForest*>(R_ExternalPtrAddr(ptr_model)), nthreads, with_distances); } else { build_tree_indices(*indexer, - *static_cast<ExtIsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"])), + *static_cast<ExtIsoForest*>(R_ExternalPtrAddr(ptr_model)), nthreads, with_distances); } - ind_ser = serialize_cpp_obj(indexer.get()); - empty_lst["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get()); - if (!Rf_isNull(lst_modify["indexer"])) { - Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"]; - indexer_R_ptr.release(); + if (is_altrepped) { + lst_cpp_objects["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, (void*)&indexer); } - - lst_modify["ind_ser"] = ind_ser; - lst_modify["indexer"] = empty_lst["indexer"]; - indexer.release(); + + else { + lst_out["ser"] = serialize_cpp_obj(indexer.get()); + lst_out["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get()); + indexer.release(); + lst_cpp_objects["indexer"] = lst_out; + } } // [[Rcpp::export(rng = false)]] bool check_node_indexer_has_distances(SEXP indexer_R_ptr) { - if (Rf_isNull(indexer_R_ptr) || R_ExternalPtrAddr(indexer_R_ptr) == NULL) - return false; - TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr)); - if (indexer->indices.empty()) return false; + const TreesIndexer *indexer = (const TreesIndexer*)R_ExternalPtrAddr(indexer_R_ptr); + if (!indexer) return false; return !indexer->indices.front().node_distances.empty(); } // [[Rcpp::export(rng = false)]] -void set_reference_points(Rcpp::List lst_modify, Rcpp::List lst_modify2, SEXP rnames, bool is_extended, +void set_reference_points(Rcpp::List lst_cpp_objects, SEXP ptr_model, SEXP ind_R_ptr, bool is_altrepped, + Rcpp::List lst_metadata, SEXP rnames, bool is_extended, Rcpp::NumericVector X_num, Rcpp::IntegerVector X_cat, Rcpp::NumericVector Xc, Rcpp::IntegerVector Xc_ind, Rcpp::IntegerVector Xc_indptr, size_t nrows, int nthreads, bool with_distances) { - Rcpp::RawVector ind_ser = Rcpp::RawVector(); - Rcpp::XPtr<TreesIndexer> indexer_R_ptr = lst_modify["indexer"]; + Rcpp::List lst_out = Rcpp::List::create( + Rcpp::_["ptr"] = R_NilValue, + Rcpp::_["ser"] = R_NilValue + ); double* numeric_data_ptr = NULL; int* categ_data_ptr = NULL; double* Xc_ptr = NULL; int* Xc_ind_ptr = NULL; @@ -1680,18 +2112,17 @@ Xc_ptr = REAL(Xc); Xc_ind_ptr = INTEGER(Xc_ind); Xc_indptr_ptr = INTEGER(Xc_indptr); } - IsoForest* model_ptr = NULL; - ExtIsoForest* ext_model_ptr = NULL; - TreesIndexer* indexer = NULL; + IsoForest* model_ptr = nullptr; + ExtIsoForest* ext_model_ptr = nullptr; + TreesIndexer* indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(ind_R_ptr)); if (is_extended) - ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"])); + ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(ptr_model)); else - model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(lst_modify["ptr"])); - indexer = indexer_R_ptr.get(); + model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(ptr_model)); MissingAction missing_action = is_extended? ext_model_ptr->missing_action : model_ptr->missing_action; @@ -1699,53 +2130,64 @@ { if (X_num.size()) numeric_data_ptr = set_R_nan_as_C_nan(numeric_data_ptr, X_num.size(), Xcpp, nthreads); if (Xc.size()) Xc_ptr = set_R_nan_as_C_nan(Xc_ptr, Xc.size(), Xcpp, nthreads); } - std::unique_ptr<TreesIndexer> new_indexer(new TreesIndexer(*indexer)); + std::unique_ptr<TreesIndexer> new_indexer(is_altrepped? nullptr : (new TreesIndexer(*indexer))); + TreesIndexer *indexer_use = is_altrepped? indexer : new_indexer.get(); - set_reference_points(model_ptr, ext_model_ptr, new_indexer.get(), + /* Note: if using an altrepped pointer, the indexer is modified in-place. If that fails, + it will end up overwitten, with the previous references taken away. OTOH, if using + a pointer + serialized, and it fails, it should not overwrite anything, and thus + should not re-assign here immediately. */ + if (is_altrepped) { + lst_metadata["reference_names"] = rnames; + } + + set_reference_points(model_ptr, ext_model_ptr, indexer_use, with_distances, numeric_data_ptr, categ_data_ptr, true, (size_t)0, (size_t)0, Xc_ptr, Xc_ind_ptr, Xc_indptr_ptr, (double*)NULL, (int*)NULL, (int*)NULL, nrows, nthreads); - ind_ser = serialize_cpp_obj(new_indexer.get()); - *indexer = std::move(*new_indexer); - new_indexer.release(); - lst_modify["ind_ser"] = ind_ser; - lst_modify2["reference_names"] = rnames; + if (!is_altrepped) { + lst_out["ser"] = serialize_cpp_obj(new_indexer.get()); + *indexer = std::move(*new_indexer); + lst_metadata["reference_names"] = rnames; + } } // [[Rcpp::export(rng = false)]] bool check_node_indexer_has_references(SEXP indexer_R_ptr) { - if (Rf_isNull(indexer_R_ptr) || R_ExternalPtrAddr(indexer_R_ptr) == NULL) - return false; - TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr)); - if (indexer->indices.empty()) - return false; - if (indexer->indices.front().reference_points.empty()) - return false; - else - return true; + const TreesIndexer *indexer = (const TreesIndexer*)R_ExternalPtrAddr(indexer_R_ptr); + if (!indexer) return false; + return !(indexer->indices.front().reference_points.empty()); } // [[Rcpp::export(rng = false)]] int get_num_references(SEXP indexer_R_ptr) { - TreesIndexer *indexer = static_cast<TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr)); - if (indexer == NULL || indexer->indices.empty()) return 0; + const TreesIndexer *indexer = static_cast<const TreesIndexer*>(R_ExternalPtrAddr(indexer_R_ptr)); + if (!indexer || indexer->indices.empty()) return 0; return indexer->indices.front().reference_points.size(); } // [[Rcpp::export(rng = false)]] -SEXP get_null_R_pointer() +SEXP get_null_R_pointer_internal(bool altrepped) { - return R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue); + if (!altrepped) { + return R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue); + } + else { + SEXP R_ptr = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); + SEXP out = PROTECT(R_new_altrep(altrepped_pointer_NullPointer, R_ptr, R_NilValue)); + UNPROTECT(2); + return out; + } } /* This library will use different code paths for opening a file path in order to support non-ASCII characters, depending on compiler and platform support. */ @@ -1850,22 +2292,34 @@ output_file ); } // [[Rcpp::export]] -Rcpp::List deserialize_from_file(Rcpp::CharacterVector fname) +Rcpp::List deserialize_from_file(Rcpp::CharacterVector fname, bool lazy_serialization) { Rcpp::List out = Rcpp::List::create( - Rcpp::_["ptr"] = R_NilValue, - Rcpp::_["serialized"] = R_NilValue, - Rcpp::_["imp_ptr"] = R_NilValue, - Rcpp::_["imp_ser"] = R_NilValue, + Rcpp::_["model"] = R_NilValue, + Rcpp::_["imputer"] = R_NilValue, Rcpp::_["indexer"] = R_NilValue, - Rcpp::_["ind_ser"] = R_NilValue, Rcpp::_["metadata"] = R_NilValue ); + if (!lazy_serialization) { + out["model"] = Rcpp::List::create( + Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false), + Rcpp::_["ser"] = R_NilValue + ); + out["imputer"] = Rcpp::List::create( + Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false), + Rcpp::_["ser"] = R_NilValue + ); + out["indexer"] = Rcpp::List::create( + Rcpp::_["ptr"] = Rcpp::XPtr<void*>(nullptr, false), + Rcpp::_["ser"] = R_NilValue + ); + } + FileOpener file_(fname[0], "rb"); FILE *input_file = file_.get_handle(); bool is_isotree_model; bool is_compatible; @@ -1926,35 +2380,66 @@ ptr_imputer, ptr_indexer, ptr_metadata ); - if (has_IsoForest) - out["serialized"] = serialize_cpp_obj(model.get()); + if (lazy_serialization) + { + if (has_IsoForest) + out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<IsoForest>, &model); + else + out["model"] = Rcpp::unwindProtect(get_altrepped_pointer<ExtIsoForest>, &model_ext); + + if (has_Imputer) + out["imputer"] = Rcpp::unwindProtect(get_altrepped_pointer<Imputer>, &imputer); + else + out["imputer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr); + + if (has_Imputer) + out["indexer"] = Rcpp::unwindProtect(get_altrepped_pointer<TreesIndexer>, &indexer); + else + out["indexer"] = Rcpp::unwindProtect(safe_get_altrepped_null_pointer, nullptr); + } + else - out["serialized"] = serialize_cpp_obj(model_ext.get()); - if (has_Imputer) - out["imp_ser"] = serialize_cpp_obj(imputer.get()); - if (has_Indexer) - out["ind_ser"] = serialize_cpp_obj(indexer.get()); + { + Rcpp::List tmp_model = out["model"]; + Rcpp::List tmp_imputer = out["imputer"]; + Rcpp::List tmp_indexer = out["indexer"]; + + if (has_IsoForest) + tmp_model["ser"] = serialize_cpp_obj(model.get()); + else + tmp_model["ser"] = serialize_cpp_obj(model_ext.get()); + + if (has_Imputer) + tmp_imputer["ser"] = serialize_cpp_obj(imputer.get()); + + if (has_Indexer) + tmp_indexer["ser"] = serialize_cpp_obj(indexer.get()); - if (has_IsoForest) { - out["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model.get()); - model.release(); + if (has_IsoForest) { + tmp_model["ptr"] = Rcpp::unwindProtect(safe_XPtr<IsoForest>, model.get()); + model.release(); + } + else { + tmp_model["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, model_ext.get()); + model_ext.release(); + } + if (has_Imputer) { + tmp_imputer["ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer.get()); + imputer.release(); + } + if (has_Indexer) { + tmp_indexer["ptr"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get()); + indexer.release(); + } + + out["model"] = tmp_model; + out["imputer"] = tmp_imputer; + out["indexer"] = tmp_indexer; } - else { - out["ptr"] = Rcpp::unwindProtect(safe_XPtr<ExtIsoForest>, model_ext.get()); - model_ext.release(); - } - if (has_Imputer) { - out["imp_ptr"] = Rcpp::unwindProtect(safe_XPtr<Imputer>, imputer.get()); - imputer.release(); - } - if (has_Indexer) { - out["indexer"] = Rcpp::unwindProtect(safe_XPtr<TreesIndexer>, indexer.get()); - indexer.release(); - } return out; } /* The functions below make for missing functionality in the @@ -2471,15 +2956,15 @@ // [[Rcpp::export(rng = false)]] int get_ntrees(SEXP model_R_ptr, bool is_extended) { if (is_extended) { - ExtIsoForest* ext_model_ptr = static_cast<ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr)); + const ExtIsoForest* ext_model_ptr = static_cast<const ExtIsoForest*>(R_ExternalPtrAddr(model_R_ptr)); return ext_model_ptr->hplanes.size(); } else { - IsoForest* model_ptr = static_cast<IsoForest*>(R_ExternalPtrAddr(model_R_ptr)); + const IsoForest* model_ptr = static_cast<const IsoForest*>(R_ExternalPtrAddr(model_R_ptr)); return model_ptr->trees.size(); } } // [[Rcpp::export(rng = false)]]