/* * Suggested reading order: * 1. Skim Init_bootsnap * 2. Skim bs_fetch * 3. The rest of everything * * Init_bootsnap sets up the ruby objects and binds bs_fetch to * Bootsnap::CompileCache::Native.fetch. * * bs_fetch is the ultimate caller for for just about every other function in * here. */ #include "bootsnap.h" #include "ruby.h" #include #include #include #include #include #include #ifndef _WIN32 #include #endif #ifdef __GLIBC__ #include #endif /* 1000 is an arbitrary limit; FNV64 plus some slashes brings the cap down to * 981 for the cache dir */ #define MAX_CACHEPATH_SIZE 1000 #define MAX_CACHEDIR_SIZE 981 #define KEY_SIZE 64 #define MAX_CREATE_TEMPFILE_ATTEMPT 3 #ifndef RB_UNLIKELY #define RB_UNLIKELY(x) (x) #endif /* * An instance of this key is written as the first 64 bytes of each cache file. * The mtime and size members track whether the file contents have changed, and * the version, ruby_platform, compile_option, and ruby_revision members track * changes to the environment that could invalidate compile results without * file contents having changed. The data_size member is not truly part of the * "key". Really, this could be called a "header" with the first six members * being an embedded "key" struct and an additional data_size member. * * The data_size indicates the remaining number of bytes in the cache file * after the header (the size of the cached artifact). * * After data_size, the struct is padded to 64 bytes. */ struct bs_cache_key { uint32_t version; uint32_t ruby_platform; uint32_t compile_option; uint32_t ruby_revision; uint64_t size; uint64_t mtime; uint64_t data_size; /* not used for equality */ uint8_t pad[24]; } __attribute__((packed)); /* * If the struct padding isn't correct to pad the key to 64 bytes, refuse to * compile. */ #define STATIC_ASSERT(X) STATIC_ASSERT2(X,__LINE__) #define STATIC_ASSERT2(X,L) STATIC_ASSERT3(X,L) #define STATIC_ASSERT3(X,L) STATIC_ASSERT_MSG(X,at_line_##L) #define STATIC_ASSERT_MSG(COND,MSG) typedef char static_assertion_##MSG[(!!(COND))*2-1] STATIC_ASSERT(sizeof(struct bs_cache_key) == KEY_SIZE); /* Effectively a schema version. Bumping invalidates all previous caches */ static const uint32_t current_version = 4; /* hash of e.g. "x86_64-darwin17", invalidating when ruby is recompiled on a * new OS ABI, etc. */ static uint32_t current_ruby_platform; /* Invalidates cache when switching ruby versions */ static uint32_t current_ruby_revision; /* Invalidates cache when RubyVM::InstructionSequence.compile_option changes */ static uint32_t current_compile_option_crc32 = 0; /* Current umask */ static mode_t current_umask; /* Bootsnap::CompileCache::{Native, Uncompilable} */ static VALUE rb_mBootsnap; static VALUE rb_mBootsnap_CompileCache; static VALUE rb_mBootsnap_CompileCache_Native; static VALUE rb_cBootsnap_CompileCache_UNCOMPILABLE; static ID instrumentation_method; static VALUE sym_miss; static VALUE sym_stale; static bool instrumentation_enabled = false; /* Functions exposed as module functions on Bootsnap::CompileCache::Native */ static VALUE bs_instrumentation_enabled_set(VALUE self, VALUE enabled); static VALUE bs_compile_option_crc32_set(VALUE self, VALUE crc32_v); static VALUE bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler, VALUE args); static VALUE bs_rb_precompile(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler); /* Helpers */ static void bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_CACHEPATH_SIZE]); static int bs_read_key(int fd, struct bs_cache_key * key); static int cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2); static VALUE bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args); static VALUE bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler); static int open_current_file(char * path, struct bs_cache_key * key, const char ** errno_provenance); static int fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE * output_data, int * exception_tag, const char ** errno_provenance); static uint32_t get_ruby_revision(void); static uint32_t get_ruby_platform(void); /* * Helper functions to call ruby methods on handler object without crashing on * exception. */ static int bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * output_data); static VALUE prot_input_to_output(VALUE arg); static void bs_input_to_output(VALUE handler, VALUE args, VALUE input_data, VALUE * output_data, int * exception_tag); static int bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval, VALUE * storage_data); struct s2o_data; struct i2o_data; struct i2s_data; /* https://bugs.ruby-lang.org/issues/13667 */ extern VALUE rb_get_coverages(void); static VALUE bs_rb_coverage_running(VALUE self) { VALUE cov = rb_get_coverages(); return RTEST(cov) ? Qtrue : Qfalse; } /* * Ruby C extensions are initialized by calling Init_. * * This sets up the module hierarchy and attaches functions as methods. * * We also populate some semi-static information about the current OS and so on. */ void Init_bootsnap(void) { rb_mBootsnap = rb_define_module("Bootsnap"); rb_mBootsnap_CompileCache = rb_define_module_under(rb_mBootsnap, "CompileCache"); rb_mBootsnap_CompileCache_Native = rb_define_module_under(rb_mBootsnap_CompileCache, "Native"); rb_cBootsnap_CompileCache_UNCOMPILABLE = rb_const_get(rb_mBootsnap_CompileCache, rb_intern("UNCOMPILABLE")); rb_global_variable(&rb_cBootsnap_CompileCache_UNCOMPILABLE); current_ruby_revision = get_ruby_revision(); current_ruby_platform = get_ruby_platform(); instrumentation_method = rb_intern("_instrument"); sym_miss = ID2SYM(rb_intern("miss")); rb_global_variable(&sym_miss); sym_stale = ID2SYM(rb_intern("stale")); rb_global_variable(&sym_stale); rb_define_module_function(rb_mBootsnap, "instrumentation_enabled=", bs_instrumentation_enabled_set, 1); rb_define_module_function(rb_mBootsnap_CompileCache_Native, "coverage_running?", bs_rb_coverage_running, 0); rb_define_module_function(rb_mBootsnap_CompileCache_Native, "fetch", bs_rb_fetch, 4); rb_define_module_function(rb_mBootsnap_CompileCache_Native, "precompile", bs_rb_precompile, 3); rb_define_module_function(rb_mBootsnap_CompileCache_Native, "compile_option_crc32=", bs_compile_option_crc32_set, 1); current_umask = umask(0777); umask(current_umask); } static VALUE bs_instrumentation_enabled_set(VALUE self, VALUE enabled) { instrumentation_enabled = RTEST(enabled); return enabled; } /* * Bootsnap's ruby code registers a hook that notifies us via this function * when compile_option changes. These changes invalidate all existing caches. * * Note that on 32-bit platforms, a CRC32 can't be represented in a Fixnum, but * can be represented by a uint. */ static VALUE bs_compile_option_crc32_set(VALUE self, VALUE crc32_v) { if (!RB_TYPE_P(crc32_v, T_BIGNUM) && !RB_TYPE_P(crc32_v, T_FIXNUM)) { Check_Type(crc32_v, T_FIXNUM); } current_compile_option_crc32 = NUM2UINT(crc32_v); return Qnil; } /* * We use FNV1a-64 to derive cache paths. The choice is somewhat arbitrary but * it has several nice properties: * * - Tiny implementation * - No external dependency * - Solid performance * - Solid randomness * - 32 bits doesn't feel collision-resistant enough; 64 is nice. */ static uint64_t fnv1a_64_iter_cstr(uint64_t h, const char *str) { unsigned char *s = (unsigned char *)str; while (*s) { h ^= (uint64_t)*s++; h += (h << 1) + (h << 4) + (h << 5) + (h << 7) + (h << 8) + (h << 40); } return h; } static uint64_t fnv1a_64_iter(uint64_t h, const VALUE str) { unsigned char *s = (unsigned char *)RSTRING_PTR(str); unsigned char *str_end = (unsigned char *)RSTRING_PTR(str) + RSTRING_LEN(str); while (s < str_end) { h ^= (uint64_t)*s++; h += (h << 1) + (h << 4) + (h << 5) + (h << 7) + (h << 8) + (h << 40); } return h; } static uint64_t fnv1a_64(const VALUE str) { uint64_t h = (uint64_t)0xcbf29ce484222325ULL; return fnv1a_64_iter(h, str); } /* * Ruby's revision may be Integer or String. CRuby 2.7 or later uses * Git commit ID as revision. It's String. */ static uint32_t get_ruby_revision(void) { VALUE ruby_revision; ruby_revision = rb_const_get(rb_cObject, rb_intern("RUBY_REVISION")); if (RB_TYPE_P(ruby_revision, RUBY_T_FIXNUM)) { return FIX2INT(ruby_revision); } else { uint64_t hash; hash = fnv1a_64(ruby_revision); return (uint32_t)(hash >> 32); } } /* * When ruby's version doesn't change, but it's recompiled on a different OS * (or OS version), we need to invalidate the cache. * * We actually factor in some extra information here, to be extra confident * that we don't try to re-use caches that will not be compatible, by factoring * in utsname.version. */ static uint32_t get_ruby_platform(void) { uint64_t hash; VALUE ruby_platform; ruby_platform = rb_const_get(rb_cObject, rb_intern("RUBY_PLATFORM")); hash = fnv1a_64(ruby_platform); #ifdef _WIN32 return (uint32_t)(hash >> 32) ^ (uint32_t)GetVersion(); #elif defined(__GLIBC__) hash = fnv1a_64_iter_cstr(hash, gnu_get_libc_version()); return (uint32_t)(hash >> 32); #else struct utsname utsname; /* Not worth crashing if this fails; lose extra cache invalidation potential */ if (uname(&utsname) >= 0) { hash = fnv1a_64_iter_cstr(hash, utsname.version); } return (uint32_t)(hash >> 32); #endif } /* * Given a cache root directory and the full path to a file being cached, * generate a path under the cache directory at which the cached artifact will * be stored. * * The path will look something like: /12/34567890abcdef */ static void bs_cache_path(const char * cachedir, const VALUE path, char (* cache_path)[MAX_CACHEPATH_SIZE]) { uint64_t hash = fnv1a_64(path); uint8_t first_byte = (hash >> (64 - 8)); uint64_t remainder = hash & 0x00ffffffffffffff; sprintf(*cache_path, "%s/%02"PRIx8"/%014"PRIx64, cachedir, first_byte, remainder); } /* * Test whether a newly-generated cache key based on the file as it exists on * disk matches the one that was generated when the file was cached (or really * compare any two keys). * * The data_size member is not compared, as it serves more of a "header" * function. */ static int cache_key_equal(struct bs_cache_key * k1, struct bs_cache_key * k2) { return ( k1->version == k2->version && k1->ruby_platform == k2->ruby_platform && k1->compile_option == k2->compile_option && k1->ruby_revision == k2->ruby_revision && k1->size == k2->size && k1->mtime == k2->mtime ); } /* * Entrypoint for Bootsnap::CompileCache::Native.fetch. The real work is done * in bs_fetch; this function just performs some basic typechecks and * conversions on the ruby VALUE arguments before passing them along. */ static VALUE bs_rb_fetch(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler, VALUE args) { FilePathValue(path_v); Check_Type(cachedir_v, T_STRING); Check_Type(path_v, T_STRING); if (RSTRING_LEN(cachedir_v) > MAX_CACHEDIR_SIZE) { rb_raise(rb_eArgError, "cachedir too long"); } char * cachedir = RSTRING_PTR(cachedir_v); char * path = RSTRING_PTR(path_v); char cache_path[MAX_CACHEPATH_SIZE]; /* generate cache path to cache_path */ bs_cache_path(cachedir, path_v, &cache_path); return bs_fetch(path, path_v, cache_path, handler, args); } /* * Entrypoint for Bootsnap::CompileCache::Native.precompile. * Similar to fetch, but it only generate the cache if missing * and doesn't return the content. */ static VALUE bs_rb_precompile(VALUE self, VALUE cachedir_v, VALUE path_v, VALUE handler) { FilePathValue(path_v); Check_Type(cachedir_v, T_STRING); Check_Type(path_v, T_STRING); if (RSTRING_LEN(cachedir_v) > MAX_CACHEDIR_SIZE) { rb_raise(rb_eArgError, "cachedir too long"); } char * cachedir = RSTRING_PTR(cachedir_v); char * path = RSTRING_PTR(path_v); char cache_path[MAX_CACHEPATH_SIZE]; /* generate cache path to cache_path */ bs_cache_path(cachedir, path_v, &cache_path); return bs_precompile(path, path_v, cache_path, handler); } /* * Open the file we want to load/cache and generate a cache key for it if it * was loaded. */ static int open_current_file(char * path, struct bs_cache_key * key, const char ** errno_provenance) { struct stat statbuf; int fd; fd = open(path, O_RDONLY); if (fd < 0) { *errno_provenance = "bs_fetch:open_current_file:open"; return fd; } #ifdef _WIN32 setmode(fd, O_BINARY); #endif if (fstat(fd, &statbuf) < 0) { *errno_provenance = "bs_fetch:open_current_file:fstat"; close(fd); return -1; } key->version = current_version; key->ruby_platform = current_ruby_platform; key->compile_option = current_compile_option_crc32; key->ruby_revision = current_ruby_revision; key->size = (uint64_t)statbuf.st_size; key->mtime = (uint64_t)statbuf.st_mtime; return fd; } #define ERROR_WITH_ERRNO -1 #define CACHE_MISS -2 #define CACHE_STALE -3 #define CACHE_UNCOMPILABLE -4 /* * Read the cache key from the given fd, which must have position 0 (e.g. * freshly opened file). * * Possible return values: * - 0 (OK, key was loaded) * - ERROR_WITH_ERRNO (-1, errno is set) * - CACHE_MISS (-2) * - CACHE_STALE (-3) */ static int bs_read_key(int fd, struct bs_cache_key * key) { ssize_t nread = read(fd, key, KEY_SIZE); if (nread < 0) return ERROR_WITH_ERRNO; if (nread < KEY_SIZE) return CACHE_STALE; return 0; } /* * Open the cache file at a given path, if it exists, and read its key into the * struct. * * Possible return values: * - 0 (OK, key was loaded) * - CACHE_MISS (-2) * - CACHE_STALE (-3) * - ERROR_WITH_ERRNO (-1, errno is set) */ static int open_cache_file(const char * path, struct bs_cache_key * key, const char ** errno_provenance) { int fd, res; fd = open(path, O_RDONLY); if (fd < 0) { *errno_provenance = "bs_fetch:open_cache_file:open"; return CACHE_MISS; } #ifdef _WIN32 setmode(fd, O_BINARY); #endif res = bs_read_key(fd, key); if (res < 0) { *errno_provenance = "bs_fetch:open_cache_file:read"; close(fd); return res; } return fd; } /* * The cache file is laid out like: * 0...64 : bs_cache_key * 64..-1 : cached artifact * * This function takes a file descriptor whose position is pre-set to 64, and * the data_size (corresponding to the remaining number of bytes) listed in the * cache header. * * We load the text from this file into a buffer, and pass it to the ruby-land * handler with exception handling via the exception_tag param. * * Data is returned via the output_data parameter, which, if there's no error * or exception, will be the final data returnable to the user. */ static int fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE * output_data, int * exception_tag, const char ** errno_provenance) { char * data = NULL; ssize_t nread; int ret; VALUE storage_data; if (data_size > 100000000000) { *errno_provenance = "bs_fetch:fetch_cached_data:datasize"; errno = EINVAL; /* because wtf? */ ret = ERROR_WITH_ERRNO; goto done; } data = ALLOC_N(char, data_size); nread = read(fd, data, data_size); if (nread < 0) { *errno_provenance = "bs_fetch:fetch_cached_data:read"; ret = ERROR_WITH_ERRNO; goto done; } if (nread != data_size) { ret = CACHE_STALE; goto done; } storage_data = rb_str_new(data, data_size); *exception_tag = bs_storage_to_output(handler, args, storage_data, output_data); if (*output_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) { ret = CACHE_UNCOMPILABLE; goto done; } ret = 0; done: if (data != NULL) xfree(data); return ret; } /* * Like mkdir -p, this recursively creates directory parents of a file. e.g. * given /a/b/c, creates /a and /a/b. */ static int mkpath(char * file_path, mode_t mode) { /* It would likely be more efficient to count back until we * find a component that *does* exist, but this will only run * at most 256 times, so it seems not worthwhile to change. */ char * p; for (p = strchr(file_path + 1, '/'); p; p = strchr(p + 1, '/')) { *p = '\0'; #ifdef _WIN32 if (mkdir(file_path) == -1) { #else if (mkdir(file_path, mode) == -1) { #endif if (errno != EEXIST) { *p = '/'; return -1; } } *p = '/'; } return 0; } /* * Write a cache header/key and a compiled artifact to a given cache path by * writing to a tmpfile and then renaming the tmpfile over top of the final * path. */ static int atomic_write_cache_file(char * path, struct bs_cache_key * key, VALUE data, const char ** errno_provenance) { char template[MAX_CACHEPATH_SIZE + 20]; char * tmp_path; int fd, ret, attempt; ssize_t nwrite; for (attempt = 0; attempt < MAX_CREATE_TEMPFILE_ATTEMPT; ++attempt) { tmp_path = strncpy(template, path, MAX_CACHEPATH_SIZE); strcat(tmp_path, ".tmp.XXXXXX"); // mkstemp modifies the template to be the actual created path fd = mkstemp(tmp_path); if (fd > 0) break; if (attempt == 0 && mkpath(tmp_path, 0775) < 0) { *errno_provenance = "bs_fetch:atomic_write_cache_file:mkpath"; return -1; } } if (fd < 0) { *errno_provenance = "bs_fetch:atomic_write_cache_file:mkstemp"; return -1; } if (chmod(tmp_path, 0644) < 0) { *errno_provenance = "bs_fetch:atomic_write_cache_file:chmod"; return -1; } #ifdef _WIN32 setmode(fd, O_BINARY); #endif key->data_size = RSTRING_LEN(data); nwrite = write(fd, key, KEY_SIZE); if (nwrite < 0) { *errno_provenance = "bs_fetch:atomic_write_cache_file:write"; return -1; } if (nwrite != KEY_SIZE) { *errno_provenance = "bs_fetch:atomic_write_cache_file:keysize"; errno = EIO; /* Lies but whatever */ return -1; } nwrite = write(fd, RSTRING_PTR(data), RSTRING_LEN(data)); if (nwrite < 0) return -1; if (nwrite != RSTRING_LEN(data)) { *errno_provenance = "bs_fetch:atomic_write_cache_file:writelength"; errno = EIO; /* Lies but whatever */ return -1; } close(fd); ret = rename(tmp_path, path); if (ret < 0) { *errno_provenance = "bs_fetch:atomic_write_cache_file:rename"; return -1; } ret = chmod(path, 0664 & ~current_umask); if (ret < 0) { *errno_provenance = "bs_fetch:atomic_write_cache_file:chmod"; } return ret; } /* Read contents from an fd, whose contents are asserted to be +size+ bytes * long, into a buffer */ static ssize_t bs_read_contents(int fd, size_t size, char ** contents, const char ** errno_provenance) { ssize_t nread; *contents = ALLOC_N(char, size); nread = read(fd, *contents, size); if (nread < 0) { *errno_provenance = "bs_fetch:bs_read_contents:read"; } return nread; } /* * This is the meat of the extension. bs_fetch is * Bootsnap::CompileCache::Native.fetch. * * There are three "formats" in use here: * 1. "input" format, which is what we load from the source file; * 2. "storage" format, which we write to the cache; * 3. "output" format, which is what we return. * * E.g., For ISeq compilation: * input: ruby source, as text * storage: binary string (RubyVM::InstructionSequence#to_binary) * output: Instance of RubyVM::InstructionSequence * * And for YAML: * input: yaml as text * storage: MessagePack or Marshal text * output: ruby object, loaded from yaml/messagepack/marshal * * A handler passed in must support three messages: * * storage_to_output(S) -> O * * input_to_output(I) -> O * * input_to_storage(I) -> S * (input_to_storage may raise Bootsnap::CompileCache::Uncompilable, which * will prevent caching and cause output to be generated with * input_to_output) * * The semantics of this function are basically: * * return storage_to_output(cache[path]) if cache[path] * storage = input_to_storage(input) * cache[path] = storage * return storage_to_output(storage) * * Or expanded a bit: * * - Check if the cache file exists and is up to date. * - If it is, load this data to storage_data. * - return storage_to_output(storage_data) * - Read the file to input_data * - Generate storage_data using input_to_storage(input_data) * - Write storage_data data, with a cache key, to the cache file. * - Return storage_to_output(storage_data) */ static VALUE bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args) { struct bs_cache_key cached_key, current_key; char * contents = NULL; int cache_fd = -1, current_fd = -1; int res, valid_cache = 0, exception_tag = 0; const char * errno_provenance = NULL; VALUE input_data; /* data read from source file, e.g. YAML or ruby source */ VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */ VALUE output_data; /* return data, e.g. ruby hash or loaded iseq */ VALUE exception; /* ruby exception object to raise instead of returning */ /* Open the source file and generate a cache key for it */ current_fd = open_current_file(path, ¤t_key, &errno_provenance); if (current_fd < 0) goto fail_errno; /* Open the cache key if it exists, and read its cache key in */ cache_fd = open_cache_file(cache_path, &cached_key, &errno_provenance); if (cache_fd == CACHE_MISS || cache_fd == CACHE_STALE) { /* This is ok: valid_cache remains false, we re-populate it. */ if (RB_UNLIKELY(instrumentation_enabled)) { rb_funcall(rb_mBootsnap, instrumentation_method, 2, cache_fd == CACHE_MISS ? sym_miss : sym_stale, path_v); } } else if (cache_fd < 0) { goto fail_errno; } else { /* True if the cache existed and no invalidating changes have occurred since * it was generated. */ valid_cache = cache_key_equal(¤t_key, &cached_key); if (RB_UNLIKELY(instrumentation_enabled)) { if (!valid_cache) { rb_funcall(rb_mBootsnap, instrumentation_method, 2, sym_stale, path_v); } } } if (valid_cache) { /* Fetch the cache data and return it if we're able to load it successfully */ res = fetch_cached_data( cache_fd, (ssize_t)cached_key.data_size, handler, args, &output_data, &exception_tag, &errno_provenance ); if (exception_tag != 0) goto raise; else if (res == CACHE_UNCOMPILABLE) { /* If fetch_cached_data returned `Uncompilable` we fallback to `input_to_output` This happens if we have say, an unsafe YAML cache, but try to load it in safe mode */ if (bs_read_contents(current_fd, current_key.size, &contents, &errno_provenance) < 0) goto fail_errno; input_data = rb_str_new(contents, current_key.size); bs_input_to_output(handler, args, input_data, &output_data, &exception_tag); if (exception_tag != 0) goto raise; goto succeed; } else if (res == CACHE_MISS || res == CACHE_STALE) valid_cache = 0; else if (res == ERROR_WITH_ERRNO) goto fail_errno; else if (!NIL_P(output_data)) goto succeed; /* fast-path, goal */ } close(cache_fd); cache_fd = -1; /* Cache is stale, invalid, or missing. Regenerate and write it out. */ /* Read the contents of the source file into a buffer */ if (bs_read_contents(current_fd, current_key.size, &contents, &errno_provenance) < 0) goto fail_errno; input_data = rb_str_new(contents, current_key.size); /* Try to compile the input_data using input_to_storage(input_data) */ exception_tag = bs_input_to_storage(handler, args, input_data, path_v, &storage_data); if (exception_tag != 0) goto raise; /* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try * to cache anything; just return input_to_output(input_data) */ if (storage_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) { bs_input_to_output(handler, args, input_data, &output_data, &exception_tag); if (exception_tag != 0) goto raise; goto succeed; } /* If storage_data isn't a string, we can't cache it */ if (!RB_TYPE_P(storage_data, T_STRING)) goto invalid_type_storage_data; /* Attempt to write the cache key and storage_data to the cache directory. * We do however ignore any failures to persist the cache, as it's better * to move along, than to interrupt the process. */ atomic_write_cache_file(cache_path, ¤t_key, storage_data, &errno_provenance); /* Having written the cache, now convert storage_data to output_data */ exception_tag = bs_storage_to_output(handler, args, storage_data, &output_data); if (exception_tag != 0) goto raise; if (output_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) { /* If storage_to_output returned `Uncompilable` we fallback to `input_to_output` */ bs_input_to_output(handler, args, input_data, &output_data, &exception_tag); if (exception_tag != 0) goto raise; } else if (NIL_P(output_data)) { /* If output_data is nil, delete the cache entry and generate the output * using input_to_output */ if (unlink(cache_path) < 0) { /* If the cache was already deleted, it might be that another process did it before us. * No point raising an error */ if (errno != ENOENT) { errno_provenance = "bs_fetch:unlink"; goto fail_errno; } } bs_input_to_output(handler, args, input_data, &output_data, &exception_tag); if (exception_tag != 0) goto raise; } goto succeed; /* output_data is now the correct return. */ #define CLEANUP \ if (contents != NULL) xfree(contents); \ if (current_fd >= 0) close(current_fd); \ if (cache_fd >= 0) close(cache_fd); succeed: CLEANUP; return output_data; fail_errno: CLEANUP; exception = rb_syserr_new(errno, errno_provenance); rb_exc_raise(exception); __builtin_unreachable(); raise: CLEANUP; rb_jump_tag(exception_tag); __builtin_unreachable(); invalid_type_storage_data: CLEANUP; Check_Type(storage_data, T_STRING); __builtin_unreachable(); #undef CLEANUP } static VALUE bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler) { struct bs_cache_key cached_key, current_key; char * contents = NULL; int cache_fd = -1, current_fd = -1; int res, valid_cache = 0, exception_tag = 0; const char * errno_provenance = NULL; VALUE input_data; /* data read from source file, e.g. YAML or ruby source */ VALUE storage_data; /* compiled data, e.g. msgpack / binary iseq */ /* Open the source file and generate a cache key for it */ current_fd = open_current_file(path, ¤t_key, &errno_provenance); if (current_fd < 0) goto fail; /* Open the cache key if it exists, and read its cache key in */ cache_fd = open_cache_file(cache_path, &cached_key, &errno_provenance); if (cache_fd == CACHE_MISS || cache_fd == CACHE_STALE) { /* This is ok: valid_cache remains false, we re-populate it. */ } else if (cache_fd < 0) { goto fail; } else { /* True if the cache existed and no invalidating changes have occurred since * it was generated. */ valid_cache = cache_key_equal(¤t_key, &cached_key); } if (valid_cache) { goto succeed; } close(cache_fd); cache_fd = -1; /* Cache is stale, invalid, or missing. Regenerate and write it out. */ /* Read the contents of the source file into a buffer */ if (bs_read_contents(current_fd, current_key.size, &contents, &errno_provenance) < 0) goto fail; input_data = rb_str_new(contents, current_key.size); /* Try to compile the input_data using input_to_storage(input_data) */ exception_tag = bs_input_to_storage(handler, Qnil, input_data, path_v, &storage_data); if (exception_tag != 0) goto fail; /* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try * to cache anything; just return false */ if (storage_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) { goto fail; } /* If storage_data isn't a string, we can't cache it */ if (!RB_TYPE_P(storage_data, T_STRING)) goto fail; /* Write the cache key and storage_data to the cache directory */ res = atomic_write_cache_file(cache_path, ¤t_key, storage_data, &errno_provenance); if (res < 0) goto fail; goto succeed; #define CLEANUP \ if (contents != NULL) xfree(contents); \ if (current_fd >= 0) close(current_fd); \ if (cache_fd >= 0) close(cache_fd); succeed: CLEANUP; return Qtrue; fail: CLEANUP; return Qfalse; #undef CLEANUP } /*****************************************************************************/ /********************* Handler Wrappers **************************************/ /***************************************************************************** * Everything after this point in the file is just wrappers to deal with ruby's * clunky method of handling exceptions from ruby methods invoked from C: * * In order to call a ruby method from C, while protecting against crashing in * the event of an exception, we must call the method with rb_protect(). * * rb_protect takes a C function and precisely one argument; however, we want * to pass multiple arguments, so we must create structs to wrap them up. * * These functions return an exception_tag, which, if non-zero, indicates an * exception that should be jumped to with rb_jump_tag after cleaning up * allocated resources. */ struct s2o_data { VALUE handler; VALUE args; VALUE storage_data; }; struct i2o_data { VALUE handler; VALUE args; VALUE input_data; }; struct i2s_data { VALUE handler; VALUE input_data; VALUE pathval; }; static VALUE try_storage_to_output(VALUE arg) { struct s2o_data * data = (struct s2o_data *)arg; return rb_funcall(data->handler, rb_intern("storage_to_output"), 2, data->storage_data, data->args); } static int bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * output_data) { int state; struct s2o_data s2o_data = { .handler = handler, .args = args, .storage_data = storage_data, }; *output_data = rb_protect(try_storage_to_output, (VALUE)&s2o_data, &state); return state; } static void bs_input_to_output(VALUE handler, VALUE args, VALUE input_data, VALUE * output_data, int * exception_tag) { struct i2o_data i2o_data = { .handler = handler, .args = args, .input_data = input_data, }; *output_data = rb_protect(prot_input_to_output, (VALUE)&i2o_data, exception_tag); } static VALUE prot_input_to_output(VALUE arg) { struct i2o_data * data = (struct i2o_data *)arg; return rb_funcall(data->handler, rb_intern("input_to_output"), 2, data->input_data, data->args); } static VALUE try_input_to_storage(VALUE arg) { struct i2s_data * data = (struct i2s_data *)arg; return rb_funcall(data->handler, rb_intern("input_to_storage"), 2, data->input_data, data->pathval); } static int bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval, VALUE * storage_data) { int state; struct i2s_data i2s_data = { .handler = handler, .input_data = input_data, .pathval = pathval, }; *storage_data = rb_protect(try_input_to_storage, (VALUE)&i2s_data, &state); return state; }