ext/stackprof/stackprof.c in stackprof-0.2.17 vs ext/stackprof/stackprof.c in stackprof-0.2.18

- old
+ new

@@ -5,20 +5,23 @@ vim: noexpandtab shiftwidth=4 tabstop=8 softtabstop=4 **********************************************************************/ #include <ruby/ruby.h> +#include <ruby/version.h> #include <ruby/debug.h> #include <ruby/st.h> #include <ruby/io.h> #include <ruby/intern.h> #include <signal.h> #include <sys/time.h> +#include <time.h> #include <pthread.h> #define BUF_SIZE 2048 #define MICROSECONDS_IN_SECOND 1000000 +#define NANOSECONDS_IN_SECOND 1000000000 #define FAKE_FRAME_GC INT2FIX(0) #define FAKE_FRAME_MARK INT2FIX(1) #define FAKE_FRAME_SWEEP INT2FIX(2) @@ -36,18 +39,64 @@ "(sweeping)", }; #define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *)) +#ifdef _POSIX_MONOTONIC_CLOCK + #define timestamp_t timespec + typedef struct timestamp_t timestamp_t; + + static void capture_timestamp(timestamp_t *ts) { + clock_gettime(CLOCK_MONOTONIC, ts); + } + + static int64_t delta_usec(timestamp_t *start, timestamp_t *end) { + int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec); + if (end->tv_nsec < start->tv_nsec) { + result -= MICROSECONDS_IN_SECOND; + result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000; + } else { + result += (end->tv_nsec - start->tv_nsec) / 1000; + } + return result; + } + + static uint64_t timestamp_usec(timestamp_t *ts) { + return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000); + } +#else + #define timestamp_t timeval + typedef struct timestamp_t timestamp_t; + + static void capture_timestamp(timestamp_t *ts) { + gettimeofday(ts, NULL); + } + + static int64_t delta_usec(timestamp_t *start, timestamp_t *end) { + struct timeval diff; + timersub(end, start, &diff); + return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec; + } + + static uint64_t timestamp_usec(timestamp_t *ts) { + return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec + } +#endif + typedef struct { size_t total_samples; size_t caller_samples; size_t seen_at_sample_number; st_table *edges; st_table *lines; } frame_data_t; +typedef struct { + uint64_t timestamp_usec; + int64_t delta_usec; +} sample_time_t; + static struct { int running; int raw; int aggregate; @@ -60,14 +109,14 @@ VALUE *raw_samples; size_t raw_samples_len; size_t raw_samples_capa; size_t raw_sample_index; - struct timeval last_sample_at; - int *raw_timestamp_deltas; - size_t raw_timestamp_deltas_len; - size_t raw_timestamp_deltas_capa; + struct timestamp_t last_sample_at; + sample_time_t *raw_sample_times; + size_t raw_sample_times_len; + size_t raw_sample_times_capa; size_t overall_signals; size_t overall_samples; size_t during_gc; size_t unrecorded_gc_samples; @@ -75,18 +124,21 @@ size_t unrecorded_gc_sweeping_samples; st_table *frames; VALUE fake_frame_names[TOTAL_FAKE_FRAMES]; VALUE empty_string; + + int buffer_count; + sample_time_t buffer_time; VALUE frames_buffer[BUF_SIZE]; int lines_buffer[BUF_SIZE]; } _stackprof; static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line; static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines; static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out; -static VALUE sym_aggregate, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping; +static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping; static VALUE sym_gc_samples, objtracer; static VALUE gc_hook; static VALUE rb_mStackProf; static void stackprof_newobj_handler(VALUE, void*); @@ -172,11 +224,11 @@ _stackprof.ignore_gc = ignore_gc; _stackprof.metadata = metadata; _stackprof.out = out; if (raw) { - gettimeofday(&_stackprof.last_sample_at, NULL); + capture_timestamp(&_stackprof.last_sample_at); } return Qtrue; } @@ -207,17 +259,23 @@ } return Qtrue; } +#if SIZEOF_VOIDP == SIZEOF_LONG +# define PTR2NUM(x) (LONG2NUM((long)(x))) +#else +# define PTR2NUM(x) (LL2NUM((LONG_LONG)(x))) +#endif + static int frame_edges_i(st_data_t key, st_data_t val, st_data_t arg) { VALUE edges = (VALUE)arg; intptr_t weight = (intptr_t)val; - rb_hash_aset(edges, rb_obj_id((VALUE)key), INT2FIX(weight)); + rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight)); return ST_CONTINUE; } static int frame_lines_i(st_data_t key, st_data_t val, st_data_t arg) @@ -240,11 +298,11 @@ VALUE results = (VALUE)arg; VALUE details = rb_hash_new(); VALUE name, file, edges, lines; VALUE line; - rb_hash_aset(results, rb_obj_id(frame), details); + rb_hash_aset(results, PTR2NUM(frame), details); if (FIXNUM_P(frame)) { name = _stackprof.fake_frame_names[FIX2INT(frame)]; file = _stackprof.empty_string; line = INT2FIX(0); @@ -312,19 +370,19 @@ st_free_table(_stackprof.frames); _stackprof.frames = NULL; if (_stackprof.raw && _stackprof.raw_samples_len) { size_t len, n, o; - VALUE raw_timestamp_deltas; + VALUE raw_sample_timestamps, raw_timestamp_deltas; VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len); for (n = 0; n < _stackprof.raw_samples_len; n++) { len = (size_t)_stackprof.raw_samples[n]; rb_ary_push(raw_samples, SIZET2NUM(len)); for (o = 0, n++; o < len; n++, o++) - rb_ary_push(raw_samples, rb_obj_id(_stackprof.raw_samples[n])); + rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n])); rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n])); } free(_stackprof.raw_samples); _stackprof.raw_samples = NULL; @@ -332,21 +390,24 @@ _stackprof.raw_samples_capa = 0; _stackprof.raw_sample_index = 0; rb_hash_aset(results, sym_raw, raw_samples); - raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len); + raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len); + raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len); - for (n = 0; n < _stackprof.raw_timestamp_deltas_len; n++) { - rb_ary_push(raw_timestamp_deltas, INT2FIX(_stackprof.raw_timestamp_deltas[n])); + for (n = 0; n < _stackprof.raw_sample_times_len; n++) { + rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec)); + rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec)); } - free(_stackprof.raw_timestamp_deltas); - _stackprof.raw_timestamp_deltas = NULL; - _stackprof.raw_timestamp_deltas_len = 0; - _stackprof.raw_timestamp_deltas_capa = 0; + free(_stackprof.raw_sample_times); + _stackprof.raw_sample_times = NULL; + _stackprof.raw_sample_times_len = 0; + _stackprof.raw_sample_times_capa = 0; + rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps); rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas); _stackprof.raw = 0; } @@ -422,18 +483,18 @@ { st_update(table, key, numtable_increment_callback, (st_data_t)increment); } void -stackprof_record_sample_for_stack(int num, int timestamp_delta) +stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta) { int i, n; VALUE prev_frame = Qnil; _stackprof.overall_samples++; - if (_stackprof.raw) { + if (_stackprof.raw && num > 0) { int found = 0; /* If there's no sample buffer allocated, then allocate one. The buffer * format is the number of frames (num), then the list of frames (from * `_stackprof.raw_samples`), followed by the number of times this @@ -481,24 +542,27 @@ } _stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1; } /* If there's no timestamp delta buffer, allocate one */ - if (!_stackprof.raw_timestamp_deltas) { - _stackprof.raw_timestamp_deltas_capa = 100; - _stackprof.raw_timestamp_deltas = malloc(sizeof(int) * _stackprof.raw_timestamp_deltas_capa); - _stackprof.raw_timestamp_deltas_len = 0; + if (!_stackprof.raw_sample_times) { + _stackprof.raw_sample_times_capa = 100; + _stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa); + _stackprof.raw_sample_times_len = 0; } /* Double the buffer size if it's too small */ - while (_stackprof.raw_timestamp_deltas_capa <= _stackprof.raw_timestamp_deltas_len + 1) { - _stackprof.raw_timestamp_deltas_capa *= 2; - _stackprof.raw_timestamp_deltas = realloc(_stackprof.raw_timestamp_deltas, sizeof(int) * _stackprof.raw_timestamp_deltas_capa); + while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) { + _stackprof.raw_sample_times_capa *= 2; + _stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa); } - /* Store the time delta (which is the amount of time between samples) */ - _stackprof.raw_timestamp_deltas[_stackprof.raw_timestamp_deltas_len++] = timestamp_delta; + /* Store the time delta (which is the amount of microseconds between samples). */ + _stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) { + .timestamp_usec = sample_timestamp, + .delta_usec = timestamp_delta, + }; } for (i = 0; i < num; i++) { int line = _stackprof.lines_buffer[i]; VALUE frame = _stackprof.frames_buffer[i]; @@ -527,99 +591,137 @@ prev_frame = frame; } if (_stackprof.raw) { - gettimeofday(&_stackprof.last_sample_at, NULL); + capture_timestamp(&_stackprof.last_sample_at); } } +// buffer the current profile frames +// This must be async-signal-safe +// Returns immediately if another set of frames are already in the buffer void -stackprof_record_sample() +stackprof_buffer_sample(void) { - int timestamp_delta = 0; + if (_stackprof.buffer_count > 0) { + // Another sample is already pending + return; + } + + uint64_t start_timestamp = 0; + int64_t timestamp_delta = 0; int num; if (_stackprof.raw) { - struct timeval t; - struct timeval diff; - gettimeofday(&t, NULL); - timersub(&t, &_stackprof.last_sample_at, &diff); - timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec; + struct timestamp_t t; + capture_timestamp(&t); + start_timestamp = timestamp_usec(&t); + timestamp_delta = delta_usec(&t, &_stackprof.last_sample_at); } + num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer); - stackprof_record_sample_for_stack(num, timestamp_delta); + + _stackprof.buffer_count = num; + _stackprof.buffer_time.timestamp_usec = start_timestamp; + _stackprof.buffer_time.delta_usec = timestamp_delta; } void -stackprof_record_gc_samples() +stackprof_record_gc_samples(void) { - int delta_to_first_unrecorded_gc_sample = 0; - int i; + int64_t delta_to_first_unrecorded_gc_sample = 0; + uint64_t start_timestamp = 0; + size_t i; if (_stackprof.raw) { - struct timeval t; - struct timeval diff; - gettimeofday(&t, NULL); - timersub(&t, &_stackprof.last_sample_at, &diff); + struct timestamp_t t; + capture_timestamp(&t); + start_timestamp = timestamp_usec(&t); // We don't know when the GC samples were actually marked, so let's // assume that they were marked at a perfectly regular interval. - delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval); + delta_to_first_unrecorded_gc_sample = delta_usec(&t, &_stackprof.last_sample_at) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval); if (delta_to_first_unrecorded_gc_sample < 0) { delta_to_first_unrecorded_gc_sample = 0; } } - for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) { - int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval); + int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval); if (_stackprof.unrecorded_gc_marking_samples) { _stackprof.frames_buffer[0] = FAKE_FRAME_MARK; _stackprof.lines_buffer[0] = 0; _stackprof.frames_buffer[1] = FAKE_FRAME_GC; _stackprof.lines_buffer[1] = 0; _stackprof.unrecorded_gc_marking_samples--; - stackprof_record_sample_for_stack(2, timestamp_delta); + stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta); } else if (_stackprof.unrecorded_gc_sweeping_samples) { _stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP; _stackprof.lines_buffer[0] = 0; _stackprof.frames_buffer[1] = FAKE_FRAME_GC; _stackprof.lines_buffer[1] = 0; _stackprof.unrecorded_gc_sweeping_samples--; - stackprof_record_sample_for_stack(2, timestamp_delta); + stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta); } else { _stackprof.frames_buffer[0] = FAKE_FRAME_GC; _stackprof.lines_buffer[0] = 0; - stackprof_record_sample_for_stack(1, timestamp_delta); + stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta); } } _stackprof.during_gc += _stackprof.unrecorded_gc_samples; _stackprof.unrecorded_gc_samples = 0; _stackprof.unrecorded_gc_marking_samples = 0; _stackprof.unrecorded_gc_sweeping_samples = 0; } +// record the sample previously buffered by stackprof_buffer_sample static void -stackprof_gc_job_handler(void *data) +stackprof_record_buffer(void) { + stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec); + + // reset the buffer + _stackprof.buffer_count = 0; +} + +static void +stackprof_sample_and_record(void) +{ + stackprof_buffer_sample(); + stackprof_record_buffer(); +} + +static void +stackprof_job_record_gc(void *data) +{ if (!_stackprof.running) return; stackprof_record_gc_samples(); } +#ifdef USE_POSTPONED_JOB static void -stackprof_job_handler(void *data) +stackprof_job_sample_and_record(void *data) { if (!_stackprof.running) return; - stackprof_record_sample(); + stackprof_sample_and_record(); } +#endif static void +stackprof_job_record_buffer(void *data) +{ + if (!_stackprof.running) return; + + stackprof_record_buffer(); +} + +static void stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext) { static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; _stackprof.overall_signals++; @@ -634,16 +736,20 @@ _stackprof.unrecorded_gc_marking_samples++; } else if (mode == sym_sweeping) { _stackprof.unrecorded_gc_sweeping_samples++; } _stackprof.unrecorded_gc_samples++; - rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0); + rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0); } else { #ifdef USE_POSTPONED_JOB - rb_postponed_job_register_one(0, stackprof_job_handler, (void*)0); + rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0); #else - stackprof_job_handler(0); + // Buffer a sample immediately, if an existing sample exists this will + // return immediately + stackprof_buffer_sample(); + // Enqueue a job to record the sample + rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0); #endif } pthread_mutex_unlock(&lock); } @@ -651,21 +757,21 @@ stackprof_newobj_handler(VALUE tpval, void *data) { _stackprof.overall_signals++; if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval)) return; - stackprof_job_handler(0); + stackprof_sample_and_record(); } static VALUE stackprof_sample(VALUE self) { if (!_stackprof.running) return Qfalse; _stackprof.overall_signals++; - stackprof_job_handler(0); + stackprof_sample_and_record(); return Qtrue; } static int frame_mark_i(st_data_t key, st_data_t val, st_data_t arg) @@ -740,10 +846,11 @@ S(lines); S(version); S(mode); S(interval); S(raw); + S(raw_sample_timestamps); S(raw_timestamp_deltas); S(out); S(metadata); S(ignore_gc); S(frames); @@ -762,12 +869,12 @@ _stackprof.raw_samples = NULL; _stackprof.raw_samples_len = 0; _stackprof.raw_samples_capa = 0; _stackprof.raw_sample_index = 0; - _stackprof.raw_timestamp_deltas = NULL; - _stackprof.raw_timestamp_deltas_len = 0; - _stackprof.raw_timestamp_deltas_capa = 0; + _stackprof.raw_sample_times = NULL; + _stackprof.raw_sample_times_len = 0; + _stackprof.raw_sample_times_capa = 0; _stackprof.empty_string = rb_str_new_cstr(""); rb_global_variable(&_stackprof.empty_string); for (i = 0; i < TOTAL_FAKE_FRAMES; i++) {