#include #include #include #include #include "helpers.h" #include "stack_recorder.h" #include "libdatadog_helpers.h" #include "ruby_helpers.h" #include "time_helpers.h" #include "heap_recorder.h" // Used to wrap a ddog_prof_Profile in a Ruby object and expose Ruby-level serialization APIs // This file implements the native bits of the Datadog::Profiling::StackRecorder class // --- // ## Synchronization mechanism for safe parallel access design notes // // The state of the StackRecorder is managed using a set of locks to avoid concurrency issues. // // This is needed because the state is expected to be accessed, in parallel, by two different threads. // // 1. The thread that is taking a stack sample and that called `record_sample`, let's call it the **sampler thread**. // In the current implementation of the profiler, there can only exist one **sampler thread** at a time; if this // constraint changes, we should revise the design of the StackRecorder. // // 2. The thread that serializes and reports profiles, let's call it the **serializer thread**. We enforce that there // cannot be more than one thread attempting to serialize profiles at a time. // // If both the sampler and serializer threads are trying to access the same `ddog_prof_Profile` in parallel, we will // have a concurrency issue. Thus, the StackRecorder has an added mechanism to avoid this. // // As an additional constraint, the **sampler thread** has absolute priority and must never block while // recording a sample. // // ### The solution: Keep two profiles at the same time // // To solve for the constraints above, the StackRecorder keeps two `ddog_prof_Profile` profile instances inside itself. // They are called the `slot_one_profile` and `slot_two_profile`. // // Each profile is paired with its own mutex. `slot_one_profile` is protected by `slot_one_mutex` and `slot_two_profile` // is protected by `slot_two_mutex`. // // We additionally introduce the concept of **active** and **inactive** profile slots. At any point, the sampler thread // can probe the mutexes to discover which of the profiles corresponds to the active slot, and then records samples in it. // When the serializer thread is ready to serialize data, it flips the active and inactive slots; it reports the data // on the previously-active profile slot, and the sampler thread can continue to record in the previously-inactive // profile slot. // // Thus, the sampler and serializer threads never cross paths, avoiding concurrency issues. The sampler thread writes to // the active profile slot, and the serializer thread reads from the inactive profile slot. // // ### Locking protocol, high-level // // The active profile slot is the slot for which its corresponding mutex **is unlocked**. That is, if the sampler // thread can grab a lock for a profile slot, then that slot is the active one. (Here you see where the constraint // stated above that only one sampler thread can exist kicks in -- this part would need to be more complex if multiple // sampler threads were in play.) // // As a counterpart, the inactive profile slot mutex is **kept locked** until such time the serializer // thread is ready to work and decides to flip the slots. // // When a new StackRecorder is initialized, the `slot_one_mutex` is unlocked, and the `slot_two_mutex` is kept locked, // that is, a new instance always starts with slot one active. // // Additionally, an `active_slot` field is kept, containing a `1` or `2`; this is only kept for the serializer thread // to use as a simplification, as well as for testing and debugging; the **sampler thread must never use the `active_slot` // field**. // // ### Locking protocol, from the sampler thread side // // When the sampler thread wants to record a sample, it goes through the following steps to discover which is the // active profile slot: // // 1. `pthread_mutex_trylock(slot_one_mutex)`. If it succeeds to grab the lock, this means the active profile slot is // slot one. If it fails, we move to the next step. // // 2. `pthread_mutex_trylock(slot_two_mutex)`. If it succeeds to grab the lock, this means the active profile slot is // slot two. If it fails, we move to the next step. // // 3. What does it mean for the sampler thread to have observed both `slot_one_mutex` as well as `slot_two_mutex` as // being locked? There are two options: // a. The sampler thread got really unlucky. When it tried to grab the `slot_one_mutex`, the active profile slot was // the second one BUT then the serializer thread flipped the slots, and by the time the sampler thread probed the // `slot_two_mutex`, that one was taken. Since the serializer thread is expected only to work once a minute, // we retry steps 1. and 2. and should be able to find an active slot. // b. Something is incorrect in the StackRecorder state. In this situation, the sampler thread should give up on // sampling and enter an error state. // // Note that in the steps above, and because the sampler thread uses `trylock` to probe the mutexes, that the // sampler thread never blocks. It either is able to find an active profile slot in a bounded amount of steps or it // enters an error state. // // This guarantees that sampler performance is never constrained by serializer performance. // // ### Locking protocol, from the serializer thread side // // When the serializer thread wants to serialize a profile, it first flips the active and inactive profile slots. // // The flipping action is described below. Consider previously-inactive and previously-active as the state of the slots // before the flipping happens. // // The flipping steps are the following: // // 1. Release the mutex for the previously-inactive profile slot. That slot, as seen by the sampler thread, is now // active. // // 2. Grab the mutex for the previously-active profile slot. Note that this can lead to the serializer thread blocking, // if the sampler thread is holding this mutex. After the mutex is grabbed, the previously-active slot becomes inactive, // as seen by the sampler thread. // // 3. Update `active_slot`. // // After flipping the profile slots, the serializer thread is now free to serialize the inactive profile slot. The slot // is kept inactive until the next time the serializer thread wants to serialize data. // // Note there can be a brief period between steps 1 and 2 where the serializer thread holds no lock, which means that // the sampler thread can pick either slot. This is OK: if the sampler thread picks the previously-inactive slot, the // samples will be reported on the next serialization; if the sampler thread picks the previously-active slot, the // samples are still included in the current serialization. Either option is correct. // // ### Additional notes // // Q: Can the sampler thread and the serializer thread ever be the same thread? (E.g. sampling in interrupt handler) // A: No; the current profiler design requires that sampling happens only on the thread that is holding the Global VM // Lock (GVL). The serializer thread flipping occurs after the serializer thread releases the GVL, and thus the // serializer thread will not be able to host the sampling process. // // --- static VALUE ok_symbol = Qnil; // :ok in Ruby static VALUE error_symbol = Qnil; // :error in Ruby // Note: Please DO NOT use `VALUE_STRING` anywhere else, instead use `DDOG_CHARSLICE_C`. // `VALUE_STRING` is only needed because older versions of gcc (4.9.2, used in our Ruby 2.2 CI test images) // tripped when compiling `enabled_value_types` using `-std=gnu99` due to the extra cast that is included in // `DDOG_CHARSLICE_C` with the following error: // // ``` // compiling ../../../../ext/ddtrace_profiling_native_extension/stack_recorder.c // ../../../../ext/ddtrace_profiling_native_extension/stack_recorder.c:23:1: error: initializer element is not constant // static const ddog_prof_ValueType enabled_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE}; // ^ // ``` #define VALUE_STRING(string) {.ptr = "" string, .len = sizeof(string) - 1} #define CPU_TIME_VALUE {.type_ = VALUE_STRING("cpu-time"), .unit = VALUE_STRING("nanoseconds")} #define CPU_TIME_VALUE_ID 0 #define CPU_SAMPLES_VALUE {.type_ = VALUE_STRING("cpu-samples"), .unit = VALUE_STRING("count")} #define CPU_SAMPLES_VALUE_ID 1 #define WALL_TIME_VALUE {.type_ = VALUE_STRING("wall-time"), .unit = VALUE_STRING("nanoseconds")} #define WALL_TIME_VALUE_ID 2 #define ALLOC_SAMPLES_VALUE {.type_ = VALUE_STRING("alloc-samples"), .unit = VALUE_STRING("count")} #define ALLOC_SAMPLES_VALUE_ID 3 #define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")} #define HEAP_SAMPLES_VALUE_ID 4 #define HEAP_SIZE_VALUE {.type_ = VALUE_STRING("heap-live-size"), .unit = VALUE_STRING("bytes")} #define HEAP_SIZE_VALUE_ID 5 #define TIMELINE_VALUE {.type_ = VALUE_STRING("timeline"), .unit = VALUE_STRING("nanoseconds")} #define TIMELINE_VALUE_ID 6 static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE}; // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it // occupies on the all_value_types array. // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types. static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID}; #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType)) // Struct for storing stats related to a profile in a particular slot. // These stats will share the same lifetime as the data in that profile slot. typedef struct slot_stats { // How many individual samples were recorded into this slot (un-weighted) uint64_t recorded_samples; } stats_slot; typedef struct profile_slot { ddog_prof_Profile profile; stats_slot stats; } profile_slot; // Contains native state for each instance struct stack_recorder_state { // Heap recorder instance heap_recorder *heap_recorder; pthread_mutex_t mutex_slot_one; profile_slot profile_slot_one; pthread_mutex_t mutex_slot_two; profile_slot profile_slot_two; short active_slot; // MUST NEVER BE ACCESSED FROM record_sample; this is NOT for the sampler thread to use. uint8_t position_for[ALL_VALUE_TYPES_COUNT]; uint8_t enabled_values_count; // Struct for storing stats related to behaviour of a stack recorder instance during its entire lifetime. struct lifetime_stats { // How many profiles have we serialized successfully so far uint64_t serialization_successes; // How many profiles have we serialized unsuccessfully so far uint64_t serialization_failures; // Stats on profile serialization time long serialization_time_ns_min; long serialization_time_ns_max; uint64_t serialization_time_ns_total; } stats_lifetime; }; // Used to group mutex and the corresponding profile slot for easy unlocking after work is done. typedef struct locked_profile_slot { pthread_mutex_t *mutex; profile_slot *data; } locked_profile_slot; struct call_serialize_without_gvl_arguments { // Set by caller struct stack_recorder_state *state; ddog_Timespec finish_timestamp; // Set by callee profile_slot *slot; ddog_prof_Profile_SerializeResult result; long heap_profile_build_time_ns; long serialize_no_gvl_time_ns; // Set by both bool serialize_ran; }; static VALUE _native_new(VALUE klass); static void initialize_slot_concurrency_control(struct stack_recorder_state *state); static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Slice_ValueType sample_types); static void stack_recorder_typed_data_free(void *data); static VALUE _native_initialize( DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled, VALUE heap_size_enabled, VALUE heap_sample_every, VALUE timeline_enabled ); static VALUE _native_serialize(VALUE self, VALUE recorder_instance); static VALUE ruby_time_from(ddog_Timespec ddprof_time); static void *call_serialize_without_gvl(void *call_args); static locked_profile_slot sampler_lock_active_profile(struct stack_recorder_state *state); static void sampler_unlock_active_profile(locked_profile_slot active_slot); static profile_slot* serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state); static VALUE _native_active_slot(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE _native_is_slot_one_mutex_locked(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE _native_is_slot_two_mutex_locked(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE test_slot_mutex_state(VALUE recorder_instance, int slot); static ddog_Timespec system_epoch_now_timespec(void); static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_instance); static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time); static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint); static void reset_profile_slot(profile_slot *slot, ddog_Timespec *start_time /* Can be null */); static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class); static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations); static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj); static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj); static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE instance); static VALUE build_profile_stats(profile_slot *slot, long serialization_time_ns, long heap_iteration_prep_time_ns, long heap_profile_build_time_ns); void stack_recorder_init(VALUE profiling_module) { VALUE stack_recorder_class = rb_define_class_under(profiling_module, "StackRecorder", rb_cObject); // Hosts methods used for testing the native code using RSpec VALUE testing_module = rb_define_module_under(stack_recorder_class, "Testing"); // Instances of the StackRecorder class are "TypedData" objects. // "TypedData" objects are special objects in the Ruby VM that can wrap C structs. // In this case, it wraps the stack_recorder_state. // // Because Ruby doesn't know how to initialize native-level structs, we MUST override the allocation function for objects // of this class so that we can manage this part. Not overriding or disabling the allocation function is a common // gotcha for "TypedData" objects that can very easily lead to VM crashes, see for instance // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(stack_recorder_class, _native_new); rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 7); rb_define_singleton_method(stack_recorder_class, "_native_serialize", _native_serialize, 1); rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1); rb_define_singleton_method(stack_recorder_class, "_native_stats", _native_stats, 1); rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1); rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3); rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 4); rb_define_singleton_method(testing_module, "_native_check_heap_hashes", _native_check_heap_hashes, 1); rb_define_singleton_method(testing_module, "_native_start_fake_slow_heap_serialization", _native_start_fake_slow_heap_serialization, 1); rb_define_singleton_method(testing_module, "_native_end_fake_slow_heap_serialization", _native_end_fake_slow_heap_serialization, 1); rb_define_singleton_method(testing_module, "_native_debug_heap_recorder", _native_debug_heap_recorder, 1); rb_define_singleton_method(testing_module, "_native_gc_force_recycle", _native_gc_force_recycle, 1); rb_define_singleton_method(testing_module, "_native_has_seen_id_flag", _native_has_seen_id_flag, 1); ok_symbol = ID2SYM(rb_intern_const("ok")); error_symbol = ID2SYM(rb_intern_const("error")); } // This structure is used to define a Ruby object that stores a pointer to a ddog_prof_Profile instance // See also https://github.com/ruby/ruby/blob/master/doc/extension.rdoc for how this works static const rb_data_type_t stack_recorder_typed_data = { .wrap_struct_name = "Datadog::Profiling::StackRecorder", .function = { .dfree = stack_recorder_typed_data_free, .dsize = NULL, // We don't track profile memory usage (although it'd be cool if we did!) // No need to provide dmark nor dcompact because we don't directly reference Ruby VALUEs from inside this object }, .flags = RUBY_TYPED_FREE_IMMEDIATELY }; static VALUE _native_new(VALUE klass) { struct stack_recorder_state *state = ruby_xcalloc(1, sizeof(struct stack_recorder_state)); // Note: Any exceptions raised from this note until the TypedData_Wrap_Struct call will lead to the state memory // being leaked. ddog_prof_Slice_ValueType sample_types = {.ptr = all_value_types, .len = ALL_VALUE_TYPES_COUNT}; initialize_slot_concurrency_control(state); for (uint8_t i = 0; i < ALL_VALUE_TYPES_COUNT; i++) { state->position_for[i] = all_value_types_positions[i]; } state->enabled_values_count = ALL_VALUE_TYPES_COUNT; state->stats_lifetime = (struct lifetime_stats) { .serialization_time_ns_min = INT64_MAX, }; // Note: At this point, slot_one_profile and slot_two_profile contain null pointers. Libdatadog validates pointers // before using them so it's ok for us to go ahead and create the StackRecorder object. VALUE stack_recorder = TypedData_Wrap_Struct(klass, &stack_recorder_typed_data, state); // NOTE: We initialize this because we want a new recorder to be operational even without initialization and our // default is everything enabled. However, if during recording initialization it turns out we don't want // heap samples, we will free and reset heap_recorder to NULL, effectively disabling all behaviour specific // to heap profiling (all calls to heap_recorder_* with a NULL heap recorder are noops). state->heap_recorder = heap_recorder_new(); // Note: Don't raise exceptions after this point, since it'll lead to libdatadog memory leaking! initialize_profiles(state, sample_types); return stack_recorder; } static void initialize_slot_concurrency_control(struct stack_recorder_state *state) { state->mutex_slot_one = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER; state->mutex_slot_two = (pthread_mutex_t) PTHREAD_MUTEX_INITIALIZER; // A newly-created StackRecorder starts with slot one being active for samples, so let's lock slot two ENFORCE_SUCCESS_GVL(pthread_mutex_lock(&state->mutex_slot_two)); state->active_slot = 1; } static void initialize_profiles(struct stack_recorder_state *state, ddog_prof_Slice_ValueType sample_types) { ddog_prof_Profile_NewResult slot_one_profile_result = ddog_prof_Profile_new(sample_types, NULL /* period is optional */, NULL /* start_time is optional */); if (slot_one_profile_result.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { rb_raise(rb_eRuntimeError, "Failed to initialize slot one profile: %"PRIsVALUE, get_error_details_and_drop(&slot_one_profile_result.err)); } ddog_prof_Profile_NewResult slot_two_profile_result = ddog_prof_Profile_new(sample_types, NULL /* period is optional */, NULL /* start_time is optional */); if (slot_two_profile_result.tag == DDOG_PROF_PROFILE_NEW_RESULT_ERR) { // Uff! Though spot. We need to make sure to properly clean up the other profile as well first ddog_prof_Profile_drop(&slot_one_profile_result.ok); // And now we can raise... rb_raise(rb_eRuntimeError, "Failed to initialize slot two profile: %"PRIsVALUE, get_error_details_and_drop(&slot_two_profile_result.err)); } state->profile_slot_one = (profile_slot) { .profile = slot_one_profile_result.ok, }; state->profile_slot_two = (profile_slot) { .profile = slot_two_profile_result.ok, }; } static void stack_recorder_typed_data_free(void *state_ptr) { struct stack_recorder_state *state = (struct stack_recorder_state *) state_ptr; pthread_mutex_destroy(&state->mutex_slot_one); ddog_prof_Profile_drop(&state->profile_slot_one.profile); pthread_mutex_destroy(&state->mutex_slot_two); ddog_prof_Profile_drop(&state->profile_slot_two.profile); heap_recorder_free(state->heap_recorder); ruby_xfree(state); } static VALUE _native_initialize( DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE cpu_time_enabled, VALUE alloc_samples_enabled, VALUE heap_samples_enabled, VALUE heap_size_enabled, VALUE heap_sample_every, VALUE timeline_enabled ) { ENFORCE_BOOLEAN(cpu_time_enabled); ENFORCE_BOOLEAN(alloc_samples_enabled); ENFORCE_BOOLEAN(heap_samples_enabled); ENFORCE_BOOLEAN(heap_size_enabled); ENFORCE_TYPE(heap_sample_every, T_FIXNUM); ENFORCE_BOOLEAN(timeline_enabled); struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); heap_recorder_set_sample_rate(state->heap_recorder, NUM2INT(heap_sample_every)); uint8_t requested_values_count = ALL_VALUE_TYPES_COUNT - (cpu_time_enabled == Qtrue ? 0 : 1) - (alloc_samples_enabled == Qtrue? 0 : 1) - (heap_samples_enabled == Qtrue ? 0 : 1) - (heap_size_enabled == Qtrue ? 0 : 1) - (timeline_enabled == Qtrue ? 0 : 1); if (requested_values_count == ALL_VALUE_TYPES_COUNT) return Qtrue; // Nothing to do, this is the default // When some sample types are disabled, we need to reconfigure libdatadog to record less types, // as well as reconfigure the position_for array to push the disabled types to the end so they don't get recorded. // See record_sample for details on the use of position_for. state->enabled_values_count = requested_values_count; ddog_prof_ValueType enabled_value_types[ALL_VALUE_TYPES_COUNT]; uint8_t next_enabled_pos = 0; uint8_t next_disabled_pos = requested_values_count; // CPU_SAMPLES_VALUE is always enabled enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) CPU_SAMPLES_VALUE; state->position_for[CPU_SAMPLES_VALUE_ID] = next_enabled_pos++; // WALL_TIME_VALUE is always enabled enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) WALL_TIME_VALUE; state->position_for[WALL_TIME_VALUE_ID] = next_enabled_pos++; if (cpu_time_enabled == Qtrue) { enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) CPU_TIME_VALUE; state->position_for[CPU_TIME_VALUE_ID] = next_enabled_pos++; } else { state->position_for[CPU_TIME_VALUE_ID] = next_disabled_pos++; } if (alloc_samples_enabled == Qtrue) { enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) ALLOC_SAMPLES_VALUE; state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_enabled_pos++; } else { state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_disabled_pos++; } if (heap_samples_enabled == Qtrue) { enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE; state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++; } else { state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++; } if (heap_size_enabled == Qtrue) { enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SIZE_VALUE; state->position_for[HEAP_SIZE_VALUE_ID] = next_enabled_pos++; } else { state->position_for[HEAP_SIZE_VALUE_ID] = next_disabled_pos++; } heap_recorder_set_size_enabled(state->heap_recorder, heap_size_enabled); if (heap_samples_enabled == Qfalse && heap_size_enabled == Qfalse) { // Turns out heap sampling is disabled but we initialized everything in _native_new // assuming all samples were enabled. We need to deinitialize the heap recorder. heap_recorder_free(state->heap_recorder); state->heap_recorder = NULL; } if (timeline_enabled == Qtrue) { enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) TIMELINE_VALUE; state->position_for[TIMELINE_VALUE_ID] = next_enabled_pos++; } else { state->position_for[TIMELINE_VALUE_ID] = next_disabled_pos++; } ddog_prof_Profile_drop(&state->profile_slot_one.profile); ddog_prof_Profile_drop(&state->profile_slot_two.profile); ddog_prof_Slice_ValueType sample_types = {.ptr = enabled_value_types, .len = state->enabled_values_count}; initialize_profiles(state, sample_types); return Qtrue; } static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); ddog_Timespec finish_timestamp = system_epoch_now_timespec(); // Need to do this while still holding on to the Global VM Lock; see comments on method for why serializer_set_start_timestamp_for_next_profile(state, finish_timestamp); long heap_iteration_prep_start_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); // Prepare the iteration on heap recorder we'll be doing outside the GVL. The preparation needs to // happen while holding on to the GVL. heap_recorder_prepare_iteration(state->heap_recorder); long heap_iteration_prep_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - heap_iteration_prep_start_time_ns; // We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this // is pending struct call_serialize_without_gvl_arguments args = { .state = state, .finish_timestamp = finish_timestamp, .serialize_ran = false }; while (!args.serialize_ran) { // Give the Ruby VM an opportunity to process any pending interruptions (including raising exceptions). // Note that it's OK to do this BEFORE call_serialize_without_gvl runs BUT NOT AFTER because afterwards // there's heap-allocated memory that MUST be cleaned before raising any exception. // // Note that we run this in a loop because `rb_thread_call_without_gvl2` may return multiple times due to // pending interrupts until it actually runs our code. process_pending_interruptions(Qnil); // We use rb_thread_call_without_gvl2 here because unlike the regular _gvl variant, gvl2 does not process // interruptions and thus does not raise exceptions after running our code. rb_thread_call_without_gvl2(call_serialize_without_gvl, &args, NULL /* No interruption function needed in this case */, NULL /* Not needed */); } // Cleanup after heap recorder iteration. This needs to happen while holding on to the GVL. heap_recorder_finish_iteration(state->heap_recorder); // NOTE: We are focusing on the serialization time outside of the GVL in this stat here. This doesn't // really cover the full serialization process but it gives a more useful number since it bypasses // the noise of acquiring GVLs and dealing with interruptions which is highly specific to runtime // conditions and over which we really have no control about. long serialization_time_ns = args.serialize_no_gvl_time_ns; if (serialization_time_ns >= 0) { // Only update stats if our serialization time is valid. state->stats_lifetime.serialization_time_ns_max = long_max_of(state->stats_lifetime.serialization_time_ns_max, serialization_time_ns); state->stats_lifetime.serialization_time_ns_min = long_min_of(state->stats_lifetime.serialization_time_ns_min, serialization_time_ns); state->stats_lifetime.serialization_time_ns_total += serialization_time_ns; } ddog_prof_Profile_SerializeResult serialized_profile = args.result; if (serialized_profile.tag == DDOG_PROF_PROFILE_SERIALIZE_RESULT_ERR) { state->stats_lifetime.serialization_failures++; return rb_ary_new_from_args(2, error_symbol, get_error_details_and_drop(&serialized_profile.err)); } state->stats_lifetime.serialization_successes++; VALUE encoded_pprof = ruby_string_from_vec_u8(serialized_profile.ok.buffer); ddog_Timespec ddprof_start = serialized_profile.ok.start; ddog_Timespec ddprof_finish = serialized_profile.ok.end; ddog_prof_EncodedProfile_drop(&serialized_profile.ok); VALUE start = ruby_time_from(ddprof_start); VALUE finish = ruby_time_from(ddprof_finish); VALUE profile_stats = build_profile_stats(args.slot, serialization_time_ns, heap_iteration_prep_time_ns, args.heap_profile_build_time_ns); return rb_ary_new_from_args(2, ok_symbol, rb_ary_new_from_args(4, start, finish, encoded_pprof, profile_stats)); } static VALUE ruby_time_from(ddog_Timespec ddprof_time) { const int utc = INT_MAX - 1; // From Ruby sources struct timespec time = {.tv_sec = ddprof_time.seconds, .tv_nsec = ddprof_time.nanoseconds}; return rb_time_timespec_new(&time, utc); } void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); locked_profile_slot active_slot = sampler_lock_active_profile(state); // Note: We initialize this array to have ALL_VALUE_TYPES_COUNT but only tell libdatadog to use the first // state->enabled_values_count values. This simplifies handling disabled value types -- we still put them on the // array, but in _native_initialize we arrange so their position starts from state->enabled_values_count and thus // libdatadog doesn't touch them. int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0}; uint8_t *position_for = state->position_for; metric_values[position_for[CPU_TIME_VALUE_ID]] = values.cpu_time_ns; metric_values[position_for[CPU_SAMPLES_VALUE_ID]] = values.cpu_or_wall_samples; metric_values[position_for[WALL_TIME_VALUE_ID]] = values.wall_time_ns; metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples; metric_values[position_for[TIMELINE_VALUE_ID]] = values.timeline_wall_time_ns; if (values.alloc_samples != 0) { // If we got an allocation sample end the heap allocation recording to commit the heap sample. // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can // be fixed with some refactoring but for now this leads to a less impactful change. end_heap_allocation_recording(state->heap_recorder, locations); } ddog_prof_Profile_Result result = ddog_prof_Profile_add( &active_slot.data->profile, (ddog_prof_Sample) { .locations = locations, .values = (ddog_Slice_I64) {.ptr = metric_values, .len = state->enabled_values_count}, .labels = labels.labels }, labels.end_timestamp_ns ); active_slot.data->stats.recorded_samples++; sampler_unlock_active_profile(active_slot); if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { rb_raise(rb_eArgError, "Failed to record sample: %"PRIsVALUE, get_error_details_and_drop(&result.err)); } } void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *alloc_class) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can // be fixed with some refactoring but for now this leads to a less impactful change. start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight, alloc_class); } void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); locked_profile_slot active_slot = sampler_lock_active_profile(state); ddog_prof_Profile_Result result = ddog_prof_Profile_set_endpoint(&active_slot.data->profile, local_root_span_id, endpoint); sampler_unlock_active_profile(active_slot); if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { rb_raise(rb_eArgError, "Failed to record endpoint: %"PRIsVALUE, get_error_details_and_drop(&result.err)); } } #define MAX_LEN_HEAP_ITERATION_ERROR_MSG 256 // Heap recorder iteration context allows us access to stack recorder state and profile being serialized // during iteration of heap recorder live objects. typedef struct heap_recorder_iteration_context { struct stack_recorder_state *state; profile_slot *slot; bool error; char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG]; } heap_recorder_iteration_context; static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) { heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg; live_object_data *object_data = &iteration_data.object_data; int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0}; uint8_t *position_for = context->state->position_for; metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight; metric_values[position_for[HEAP_SIZE_VALUE_ID]] = object_data->size * object_data->weight; ddog_prof_Label labels[2]; size_t label_offset = 0; if (object_data->class != NULL) { labels[label_offset++] = (ddog_prof_Label) { .key = DDOG_CHARSLICE_C("allocation class"), .str = (ddog_CharSlice) { .ptr = object_data->class, .len = strlen(object_data->class), }, .num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this }; } labels[label_offset++] = (ddog_prof_Label) { .key = DDOG_CHARSLICE_C("gc gen age"), .num = object_data->gen_age, }; ddog_prof_Profile_Result result = ddog_prof_Profile_add( &context->slot->profile, (ddog_prof_Sample) { .locations = iteration_data.locations, .values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count}, .labels = (ddog_prof_Slice_Label) { .ptr = labels, .len = label_offset, } }, 0 ); context->slot->stats.recorded_samples++; if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { read_ddogerr_string_and_drop(&result.err, context->error_msg, MAX_LEN_HEAP_ITERATION_ERROR_MSG); context->error = true; // By returning false we cancel the iteration return false; } // Keep on iterating to next item! return true; } static void build_heap_profile_without_gvl(struct stack_recorder_state *state, profile_slot *slot) { heap_recorder_iteration_context iteration_context = { .state = state, .slot = slot, .error = false, .error_msg = {0}, }; bool iterated = heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context); // We wait until we're out of the iteration to grab the gvl and raise. This is important because during // iteration we may potentially acquire locks in the heap recorder and we could reach a deadlock if the // same locks are acquired by the heap recorder while holding the gvl (since we'd be operating on the // same locks but acquiring them in different order). if (!iterated) { grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: iteration cancelled"); } else if (iteration_context.error) { grab_gvl_and_raise(rb_eRuntimeError, "Failure during heap profile building: %s", iteration_context.error_msg); } } static void *call_serialize_without_gvl(void *call_args) { struct call_serialize_without_gvl_arguments *args = (struct call_serialize_without_gvl_arguments *) call_args; long serialize_no_gvl_start_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE); profile_slot *slot_now_inactive = serializer_flip_active_and_inactive_slots(args->state); args->slot = slot_now_inactive; // Now that we have the inactive profile with all but heap samples, lets fill it with heap data // without needing to race with the active sampler build_heap_profile_without_gvl(args->state, args->slot); args->heap_profile_build_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - serialize_no_gvl_start_time_ns; // Note: The profile gets reset by the serialize call args->result = ddog_prof_Profile_serialize(&args->slot->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */); args->serialize_ran = true; args->serialize_no_gvl_time_ns = monotonic_wall_time_now_ns(DO_NOT_RAISE_ON_FAILURE) - serialize_no_gvl_start_time_ns; return NULL; // Unused } VALUE enforce_recorder_instance(VALUE object) { Check_TypedStruct(object, &stack_recorder_typed_data); return object; } static locked_profile_slot sampler_lock_active_profile(struct stack_recorder_state *state) { int error; for (int attempts = 0; attempts < 2; attempts++) { error = pthread_mutex_trylock(&state->mutex_slot_one); if (error && error != EBUSY) ENFORCE_SUCCESS_GVL(error); // Slot one is active if (!error) return (locked_profile_slot) {.mutex = &state->mutex_slot_one, .data = &state->profile_slot_one}; // If we got here, slot one was not active, let's try slot two error = pthread_mutex_trylock(&state->mutex_slot_two); if (error && error != EBUSY) ENFORCE_SUCCESS_GVL(error); // Slot two is active if (!error) return (locked_profile_slot) {.mutex = &state->mutex_slot_two, .data = &state->profile_slot_two}; } // We already tried both multiple times, and we did not succeed. This is not expected to happen. Let's stop sampling. rb_raise(rb_eRuntimeError, "Failed to grab either mutex in sampler_lock_active_profile"); } static void sampler_unlock_active_profile(locked_profile_slot active_slot) { ENFORCE_SUCCESS_GVL(pthread_mutex_unlock(active_slot.mutex)); } static profile_slot* serializer_flip_active_and_inactive_slots(struct stack_recorder_state *state) { int previously_active_slot = state->active_slot; if (previously_active_slot != 1 && previously_active_slot != 2) { grab_gvl_and_raise(rb_eRuntimeError, "Unexpected active_slot state %d in serializer_flip_active_and_inactive_slots", previously_active_slot); } pthread_mutex_t *previously_active = (previously_active_slot == 1) ? &state->mutex_slot_one : &state->mutex_slot_two; pthread_mutex_t *previously_inactive = (previously_active_slot == 1) ? &state->mutex_slot_two : &state->mutex_slot_one; // Release the lock, thus making this slot active ENFORCE_SUCCESS_NO_GVL(pthread_mutex_unlock(previously_inactive)); // Grab the lock, thus making this slot inactive ENFORCE_SUCCESS_NO_GVL(pthread_mutex_lock(previously_active)); // Update active_slot state->active_slot = (previously_active_slot == 1) ? 2 : 1; // Return pointer to previously active slot (now inactive) return (previously_active_slot == 1) ? &state->profile_slot_one : &state->profile_slot_two; } // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_active_slot(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); return INT2NUM(state->active_slot); } // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_is_slot_one_mutex_locked(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) { return test_slot_mutex_state(recorder_instance, 1); } // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_is_slot_two_mutex_locked(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) { return test_slot_mutex_state(recorder_instance, 2); } static VALUE test_slot_mutex_state(VALUE recorder_instance, int slot) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); pthread_mutex_t *slot_mutex = (slot == 1) ? &state->mutex_slot_one : &state->mutex_slot_two; // Like Heisenberg's uncertainty principle, we can't observe without affecting... int error = pthread_mutex_trylock(slot_mutex); if (error == 0) { // Mutex was unlocked ENFORCE_SUCCESS_GVL(pthread_mutex_unlock(slot_mutex)); return Qfalse; } else if (error == EBUSY) { // Mutex was locked return Qtrue; } else { ENFORCE_SUCCESS_GVL(error); rb_raise(rb_eRuntimeError, "Failed to raise exception in test_slot_mutex_state; this should never happen"); } } static ddog_Timespec system_epoch_now_timespec(void) { long now_ns = system_epoch_time_now_ns(RAISE_ON_FAILURE); return (ddog_Timespec) {.seconds = now_ns / SECONDS_AS_NS(1), .nanoseconds = now_ns % SECONDS_AS_NS(1)}; } // After the Ruby VM forks, this method gets called in the child process to clean up any leftover state from the parent. // // Assumption: This method gets called BEFORE restarting profiling -- e.g. there are no components attempting to // trigger samples at the same time. static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_instance) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); // In case the fork happened halfway through `serializer_flip_active_and_inactive_slots` execution and the // resulting state is inconsistent, we make sure to reset it back to the initial state. initialize_slot_concurrency_control(state); reset_profile_slot(&state->profile_slot_one, /* start_time: */ NULL); reset_profile_slot(&state->profile_slot_two, /* start_time: */ NULL); heap_recorder_after_fork(state->heap_recorder); return Qtrue; } // Assumption 1: This method is called with the GVL being held, because `ddog_prof_Profile_reset` mutates the profile and must // not be interrupted part-way through by a VM fork. static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time) { // Before making this profile active, we reset it so that it uses the correct start_time for its start profile_slot *next_profile_slot = (state->active_slot == 1) ? &state->profile_slot_two : &state->profile_slot_one; reset_profile_slot(next_profile_slot, &start_time); } static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint) { ENFORCE_TYPE(local_root_span_id, T_FIXNUM); record_endpoint(recorder_instance, NUM2ULL(local_root_span_id), char_slice_from_ruby_string(endpoint)); return Qtrue; } static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class) { ENFORCE_TYPE(weight, T_FIXNUM); ddog_CharSlice alloc_class_slice = char_slice_from_ruby_string(alloc_class); track_object(recorder_instance, new_obj, NUM2UINT(weight), &alloc_class_slice); return Qtrue; } static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations) { ENFORCE_TYPE(locations, T_ARRAY); size_t locations_len = rb_array_len(locations); ddog_prof_Location locations_arr[locations_len]; for (size_t i = 0; i < locations_len; i++) { VALUE location = rb_ary_entry(locations, i); ENFORCE_TYPE(location, T_ARRAY); VALUE name = rb_ary_entry(location, 0); VALUE filename = rb_ary_entry(location, 1); VALUE line = rb_ary_entry(location, 2); ENFORCE_TYPE(name, T_STRING); ENFORCE_TYPE(filename, T_STRING); ENFORCE_TYPE(line, T_FIXNUM); locations_arr[i] = (ddog_prof_Location) { .line = line, .function = (ddog_prof_Function) { .name = char_slice_from_ruby_string(name), .filename = char_slice_from_ruby_string(filename), } }; } ddog_prof_Slice_Location ddog_locations = { .len = locations_len, .ptr = locations_arr, }; heap_recorder_testonly_assert_hash_matches(ddog_locations); return Qnil; } static void reset_profile_slot(profile_slot *slot, ddog_Timespec *start_time /* Can be null */) { ddog_prof_Profile_Result reset_result = ddog_prof_Profile_reset(&slot->profile, start_time); if (reset_result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { rb_raise(rb_eRuntimeError, "Failed to reset profile: %"PRIsVALUE, get_error_details_and_drop(&reset_result.err)); } slot->stats = (stats_slot) {}; } // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); heap_recorder_prepare_iteration(state->heap_recorder); return Qnil; } // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); heap_recorder_finish_iteration(state->heap_recorder); return Qnil; } // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_debug_heap_recorder(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); return heap_recorder_testonly_debug(state->heap_recorder); } #pragma GCC diagnostic push // rb_gc_force_recycle was deprecated in latest versions of Ruby and is a noop. #pragma GCC diagnostic ignored "-Wdeprecated-declarations" #pragma GCC diagnostic ignored "-Wunused-parameter" // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_gc_force_recycle(DDTRACE_UNUSED VALUE _self, VALUE obj) { #ifdef HAVE_WORKING_RB_GC_FORCE_RECYCLE rb_gc_force_recycle(obj); #endif return Qnil; } #pragma GCC diagnostic pop // This method exists only to enable testing Datadog::Profiling::StackRecorder behavior using RSpec. // It SHOULD NOT be used for other purposes. static VALUE _native_has_seen_id_flag(DDTRACE_UNUSED VALUE _self, VALUE obj) { #ifndef NO_SEEN_OBJ_ID_FLAG if (RB_FL_TEST(obj, RUBY_FL_SEEN_OBJ_ID)) { return Qtrue; } else { return Qfalse; } #else return Qfalse; #endif } static VALUE _native_stats(DDTRACE_UNUSED VALUE self, VALUE recorder_instance) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); uint64_t total_serializations = state->stats_lifetime.serialization_successes + state->stats_lifetime.serialization_failures; VALUE heap_recorder_snapshot = state->heap_recorder ? heap_recorder_state_snapshot(state->heap_recorder) : Qnil; VALUE stats_as_hash = rb_hash_new(); VALUE arguments[] = { ID2SYM(rb_intern("serialization_successes")), /* => */ ULL2NUM(state->stats_lifetime.serialization_successes), ID2SYM(rb_intern("serialization_failures")), /* => */ ULL2NUM(state->stats_lifetime.serialization_failures), ID2SYM(rb_intern("serialization_time_ns_min")), /* => */ RUBY_NUM_OR_NIL(state->stats_lifetime.serialization_time_ns_min, != INT64_MAX, LONG2NUM), ID2SYM(rb_intern("serialization_time_ns_max")), /* => */ RUBY_NUM_OR_NIL(state->stats_lifetime.serialization_time_ns_max, > 0, LONG2NUM), ID2SYM(rb_intern("serialization_time_ns_total")), /* => */ RUBY_NUM_OR_NIL(state->stats_lifetime.serialization_time_ns_total, > 0, LONG2NUM), ID2SYM(rb_intern("serialization_time_ns_avg")), /* => */ RUBY_AVG_OR_NIL(state->stats_lifetime.serialization_time_ns_total, total_serializations), ID2SYM(rb_intern("heap_recorder_snapshot")), /* => */ heap_recorder_snapshot, }; for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]); return stats_as_hash; } static VALUE build_profile_stats(profile_slot *slot, long serialization_time_ns, long heap_iteration_prep_time_ns, long heap_profile_build_time_ns) { VALUE stats_as_hash = rb_hash_new(); VALUE arguments[] = { ID2SYM(rb_intern("recorded_samples")), /* => */ ULL2NUM(slot->stats.recorded_samples), ID2SYM(rb_intern("serialization_time_ns")), /* => */ LONG2NUM(serialization_time_ns), ID2SYM(rb_intern("heap_iteration_prep_time_ns")), /* => */ LONG2NUM(heap_iteration_prep_time_ns), ID2SYM(rb_intern("heap_profile_build_time_ns")), /* => */ LONG2NUM(heap_profile_build_time_ns), }; for (long unsigned int i = 0; i < VALUE_COUNT(arguments); i += 2) rb_hash_aset(stats_as_hash, arguments[i], arguments[i+1]); return stats_as_hash; }