#include "mmap.h" #include #include #include #include #include "file_format.h" #include "utils.h" #if 0 #include #define DEBUGF(format, ...) printf("%d: " format "\n", __LINE__, __VA_ARGS__) #else #define DEBUGF(format, ...) #endif /* This is the ID of the WeakMap used to track strings allocated that * are backed by a memory-mapped file. */ #define WEAK_OBJ_TRACKER "@weak_obj_tracker" /** * Maps a given VALUE to some key for the WeakMap. For now, we just use * the integer value as the key since that suffices, though this does * require Ruby 2.7 due to https://bugs.ruby-lang.org/issues/16035. */ static VALUE weak_obj_tracker_get_key(VALUE val) { return val; } /** * Adds a T_STRING type to the WeakMap. The WeakMap should be stored * as an instance variable. */ static void weak_obj_tracker_add(VALUE obj, VALUE val) { Check_Type(val, T_STRING); VALUE tracker = rb_iv_get(obj, WEAK_OBJ_TRACKER); VALUE key = weak_obj_tracker_get_key(val); rb_funcall(tracker, rb_intern("[]="), 2, key, val); } /** * Iterator function for updating a single element from the WeakMap. */ VALUE mm_update_obj_i(RB_BLOCK_CALL_FUNC_ARGLIST(i, self)) { Check_Type(self, T_DATA); Check_Type(i, T_STRING); rb_check_arity(argc, 1, 1); mm_ipc *i_mm; GET_MMAP(self, i_mm, MM_MODIFY); RSTRING(i)->as.heap.ptr = i_mm->t->addr; RSTRING(i)->as.heap.len = i_mm->t->real; return Qtrue; } /** * This iterates through the WeakMap defined on the class and updates * the RStrings to use the newly-allocated memory region. */ void mm_update(VALUE obj) { VALUE tracker = rb_iv_get(obj, WEAK_OBJ_TRACKER); rb_block_call(tracker, rb_intern("each_value"), 0, NULL, mm_update_obj_i, obj); } typedef struct { VALUE obj, *argv; ID id; int flag, argc; } mm_bang; static VALUE mm_protect_bang(VALUE *t) { return rb_funcall2(t[0], (ID)t[1], (int)t[2], (VALUE *)t[3]); } static VALUE mm_recycle(VALUE str) { rb_gc_force_recycle(str); return str; } static VALUE mm_vunlock(VALUE obj) { mm_ipc *i_mm; GET_MMAP(obj, i_mm, 0); return Qnil; } static VALUE mm_str(VALUE obj, int modify) { mm_ipc *i_mm; VALUE ret = Qnil; GET_MMAP(obj, i_mm, modify & ~MM_ORIGIN); if (modify & MM_MODIFY) { if (i_mm->t->flag & MM_FROZEN) rb_error_frozen("mmap"); } ret = rb_obj_alloc(rb_cString); RSTRING(ret)->as.heap.ptr = i_mm->t->addr; RSTRING(ret)->as.heap.aux.capa = i_mm->t->len; RSTRING(ret)->as.heap.len = i_mm->t->real; weak_obj_tracker_add(obj, ret); DEBUGF("RString capa: %d, len: %d", RSTRING(ret)->as.heap.aux.capa, RSTRING(ret)->as.heap.len); if (modify & MM_ORIGIN) { #if HAVE_RB_DEFINE_ALLOC_FUNC RSTRING(ret)->as.heap.aux.shared = obj; FL_SET(ret, RSTRING_NOEMBED); FL_SET(ret, FL_USER18); #else RSTRING(ret)->orig = ret; #endif } if (i_mm->t->flag & MM_FROZEN) { ret = rb_obj_freeze(ret); } return ret; } static VALUE mm_i_bang(bang_st) mm_bang *bang_st; { VALUE str, res; mm_ipc *i_mm; str = mm_str(bang_st->obj, bang_st->flag); if (bang_st->flag & MM_PROTECT) { VALUE tmp[4]; tmp[0] = str; tmp[1] = (VALUE)bang_st->id; tmp[2] = (VALUE)bang_st->argc; tmp[3] = (VALUE)bang_st->argv; res = rb_ensure(mm_protect_bang, (VALUE)tmp, mm_recycle, str); } else { res = rb_funcall2(str, bang_st->id, bang_st->argc, bang_st->argv); RB_GC_GUARD(res); } if (res != Qnil) { GET_MMAP(bang_st->obj, i_mm, 0); i_mm->t->real = RSTRING_LEN(str); } return res; } static VALUE mm_bang_i(VALUE obj, int flag, ID id, int argc, VALUE *argv) { VALUE res; mm_ipc *i_mm; mm_bang bang_st; GET_MMAP(obj, i_mm, 0); if ((flag & MM_CHANGE) && (i_mm->t->flag & MM_FIXED)) { rb_raise(rb_eTypeError, "try to change the size of a fixed map"); } bang_st.obj = obj; bang_st.flag = flag; bang_st.id = id; bang_st.argc = argc; bang_st.argv = argv; if (i_mm->t->flag & MM_IPC) { res = rb_ensure(mm_i_bang, (VALUE)&bang_st, mm_vunlock, obj); } else { res = mm_i_bang(&bang_st); } if (res == Qnil) return res; return (flag & MM_ORIGIN) ? res : obj; } static void mm_free(mm_ipc *i_mm) { if (i_mm->t->path) { if (munmap(i_mm->t->addr, i_mm->t->len) != 0) { if (i_mm->t->path != (char *)-1 && i_mm->t->path != NULL) { free(i_mm->t->path); } free(i_mm); rb_raise(rb_eRuntimeError, "munmap failed at %s:%d with errno: %d", __FILE__, __LINE__, errno); } if (i_mm->t->path != (char *)-1) { if (i_mm->t->real < i_mm->t->len && i_mm->t->vscope != MAP_PRIVATE && truncate(i_mm->t->path, i_mm->t->real) == -1) { free(i_mm->t->path); free(i_mm); rb_raise(rb_eTypeError, "truncate"); } free(i_mm->t->path); } } free(i_mm); } /* * call-seq: * new(file) * * create a new Mmap object * * * file * * * Creates a mapping that's shared with all other processes * mapping the same areas of the file. * */ VALUE mm_s_new(int argc, VALUE *argv, VALUE obj) { VALUE res = rb_funcall2(obj, rb_intern("allocate"), 0, 0); rb_obj_call_init(res, argc, argv); return res; } VALUE mm_s_alloc(VALUE obj) { VALUE res; mm_ipc *i_mm; res = Data_Make_Struct(obj, mm_ipc, 0, mm_free, i_mm); i_mm->t = ALLOC_N(mm_mmap, 1); MEMZERO(i_mm->t, mm_mmap, 1); i_mm->t->fd = -1; return res; } size_t next_page_boundary(size_t value) { size_t page_size = sysconf(_SC_PAGESIZE); while (page_size < value) { page_size *= 2; } return page_size; } /* Reference implementations: * mozilla: https://hg.mozilla.org/mozilla-central/file/3d846420a907/xpcom/glue/FileUtils.cpp#l71 * glibc: https://github.com/lattera/glibc/blob/master/sysdeps/posix/posix_fallocate.c */ int reserve_mmap_file_bytes(int fd, size_t size) { #if __linux__ /* From https://stackoverflow.com/a/22820221: The difference with * ftruncate(2) is that (on file systems supporting it, e.g. Ext4) * disk space is indeed reserved by posix_fallocate but ftruncate * extends the file by adding holes (and without reserving disk * space). */ return posix_fallocate(fd, 0, size); #else /* We simplify the reference implemnetations since we generally * don't need to reserve more than a page size. */ return ftruncate(fd, size); #endif } VALUE mm_init(VALUE obj, VALUE fname) { struct stat st; int fd, smode = 0, pmode = 0, vscope, perm, init; MMAP_RETTYPE addr; mm_ipc *i_mm; char *path; size_t size = 0; off_t offset; vscope = 0; path = 0; fd = -1; VALUE klass = rb_eval_string("ObjectSpace::WeakMap"); VALUE weak_obj_tracker = rb_class_new_instance(0, NULL, klass); rb_iv_set(obj, WEAK_OBJ_TRACKER, weak_obj_tracker); fname = rb_str_to_str(fname); SafeStringValue(fname); path = StringValuePtr(fname); vscope = MAP_SHARED; size = 0; perm = 0666; smode = O_RDWR; pmode = PROT_READ | PROT_WRITE; if ((fd = open(path, smode, perm)) == -1) { rb_raise(rb_eArgError, "Can't open %s", path); } if (fstat(fd, &st) == -1) { close(fd); rb_raise(rb_eArgError, "Can't stat %s", path); } size = st.st_size; Data_Get_Struct(obj, mm_ipc, i_mm); offset = 0; init = 0; if (size == 0) { init = 1; size = INITIAL_SIZE; } /* We need to ensure the underlying file descriptor is at least a page size. * Otherwise, we could get a SIGBUS error if mmap() attempts to read or write * past the file. */ size_t reserve_size = next_page_boundary(size); if (reserve_mmap_file_bytes(fd, reserve_size) != 0) { close(fd); rb_raise(rb_eIOError, "Can't reserve %zu bytes for memory-mapped file in %s", reserve_size, path); } addr = mmap(0, size, pmode, vscope, fd, offset); if (addr == MAP_FAILED || !addr) { close(fd); rb_raise(rb_eArgError, "mmap failed (%d)", errno); } i_mm->t->fd = fd; i_mm->t->addr = addr; i_mm->t->len = size; if (!init) { i_mm->t->real = size; } i_mm->t->pmode = pmode; i_mm->t->vscope = vscope; i_mm->t->smode = smode & ~O_TRUNC; i_mm->t->path = (path) ? ruby_strdup(path) : (char *)-1; if (smode == O_WRONLY) { i_mm->t->flag |= MM_FIXED; } return obj; } /* * Document-method: [] * Document-method: slice * * call-seq: [](args) * * Element reference - with the following syntax: * * self[nth] * * retrieve the nth character * * self[start..last] * * return a substring from start to last * * self[start, length] * * return a substring of lenght characters from start */ VALUE mm_aref_m(int argc, VALUE *argv, VALUE obj) { return mm_bang_i(obj, MM_ORIGIN, rb_intern("[]"), argc, argv); } /* * Document-method: msync * Document-method: sync * Document-method: flush * * call-seq: msync * * flush the file */ VALUE mm_msync(int argc, VALUE *argv, VALUE obj) { mm_ipc *i_mm; GET_MMAP(obj, i_mm, MM_MODIFY); VALUE oflag; int ret; int flag = MS_SYNC; if (argc) { rb_scan_args(argc, argv, "01", &oflag); flag = NUM2INT(oflag); } if ((ret = msync(i_mm->t->addr, i_mm->t->len, flag)) != 0) { rb_raise(rb_eArgError, "msync(%d)", ret); } return obj; } /* * Document-method: munmap * Document-method: unmap * * call-seq: munmap * * terminate the association */ VALUE mm_unmap(VALUE obj) { mm_ipc *i_mm; GET_MMAP(obj, i_mm, 0); if (i_mm->t->path) { if (munmap(i_mm->t->addr, i_mm->t->len) != 0) { if (i_mm->t->path != (char *)-1 && i_mm->t->path != NULL) { free(i_mm->t->path); i_mm->t->path = NULL; } rb_raise(rb_eRuntimeError, "munmap failed at %s:%d with errno: %d", __FILE__, __LINE__, errno); } if (i_mm->t->path != (char *)-1) { if (i_mm->t->real < i_mm->t->len && i_mm->t->vscope != MAP_PRIVATE && truncate(i_mm->t->path, i_mm->t->real) == -1) { rb_raise(rb_eTypeError, "truncate"); } free(i_mm->t->path); } // Ensure any lingering RString values get a length of zero. We // can't zero out the address since GET_MMAP() inside // mm_update_obj_i() expects a non-null address and path. i_mm->t->len = 0; i_mm->t->real = 0; mm_update(obj); i_mm->t->addr = NULL; i_mm->t->path = NULL; } close(i_mm->t->fd); return Qnil; }