ext/stackprof/stackprof.c in stackprof-0.2.17 vs ext/stackprof/stackprof.c in stackprof-0.2.18
- old
+ new
@@ -5,20 +5,23 @@
vim: noexpandtab shiftwidth=4 tabstop=8 softtabstop=4
**********************************************************************/
#include <ruby/ruby.h>
+#include <ruby/version.h>
#include <ruby/debug.h>
#include <ruby/st.h>
#include <ruby/io.h>
#include <ruby/intern.h>
#include <signal.h>
#include <sys/time.h>
+#include <time.h>
#include <pthread.h>
#define BUF_SIZE 2048
#define MICROSECONDS_IN_SECOND 1000000
+#define NANOSECONDS_IN_SECOND 1000000000
#define FAKE_FRAME_GC INT2FIX(0)
#define FAKE_FRAME_MARK INT2FIX(1)
#define FAKE_FRAME_SWEEP INT2FIX(2)
@@ -36,18 +39,64 @@
"(sweeping)",
};
#define TOTAL_FAKE_FRAMES (sizeof(fake_frame_cstrs) / sizeof(char *))
+#ifdef _POSIX_MONOTONIC_CLOCK
+ #define timestamp_t timespec
+ typedef struct timestamp_t timestamp_t;
+
+ static void capture_timestamp(timestamp_t *ts) {
+ clock_gettime(CLOCK_MONOTONIC, ts);
+ }
+
+ static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
+ int64_t result = MICROSECONDS_IN_SECOND * (end->tv_sec - start->tv_sec);
+ if (end->tv_nsec < start->tv_nsec) {
+ result -= MICROSECONDS_IN_SECOND;
+ result += (NANOSECONDS_IN_SECOND + end->tv_nsec - start->tv_nsec) / 1000;
+ } else {
+ result += (end->tv_nsec - start->tv_nsec) / 1000;
+ }
+ return result;
+ }
+
+ static uint64_t timestamp_usec(timestamp_t *ts) {
+ return (MICROSECONDS_IN_SECOND * ts->tv_sec) + (ts->tv_nsec / 1000);
+ }
+#else
+ #define timestamp_t timeval
+ typedef struct timestamp_t timestamp_t;
+
+ static void capture_timestamp(timestamp_t *ts) {
+ gettimeofday(ts, NULL);
+ }
+
+ static int64_t delta_usec(timestamp_t *start, timestamp_t *end) {
+ struct timeval diff;
+ timersub(end, start, &diff);
+ return (MICROSECONDS_IN_SECOND * diff.tv_sec) + diff.tv_usec;
+ }
+
+ static uint64_t timestamp_usec(timestamp_t *ts) {
+ return (MICROSECONDS_IN_SECOND * ts.tv_sec) + diff.tv_usec
+ }
+#endif
+
typedef struct {
size_t total_samples;
size_t caller_samples;
size_t seen_at_sample_number;
st_table *edges;
st_table *lines;
} frame_data_t;
+typedef struct {
+ uint64_t timestamp_usec;
+ int64_t delta_usec;
+} sample_time_t;
+
static struct {
int running;
int raw;
int aggregate;
@@ -60,14 +109,14 @@
VALUE *raw_samples;
size_t raw_samples_len;
size_t raw_samples_capa;
size_t raw_sample_index;
- struct timeval last_sample_at;
- int *raw_timestamp_deltas;
- size_t raw_timestamp_deltas_len;
- size_t raw_timestamp_deltas_capa;
+ struct timestamp_t last_sample_at;
+ sample_time_t *raw_sample_times;
+ size_t raw_sample_times_len;
+ size_t raw_sample_times_capa;
size_t overall_signals;
size_t overall_samples;
size_t during_gc;
size_t unrecorded_gc_samples;
@@ -75,18 +124,21 @@
size_t unrecorded_gc_sweeping_samples;
st_table *frames;
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
VALUE empty_string;
+
+ int buffer_count;
+ sample_time_t buffer_time;
VALUE frames_buffer[BUF_SIZE];
int lines_buffer[BUF_SIZE];
} _stackprof;
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
-static VALUE sym_aggregate, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
+static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
static VALUE sym_gc_samples, objtracer;
static VALUE gc_hook;
static VALUE rb_mStackProf;
static void stackprof_newobj_handler(VALUE, void*);
@@ -172,11 +224,11 @@
_stackprof.ignore_gc = ignore_gc;
_stackprof.metadata = metadata;
_stackprof.out = out;
if (raw) {
- gettimeofday(&_stackprof.last_sample_at, NULL);
+ capture_timestamp(&_stackprof.last_sample_at);
}
return Qtrue;
}
@@ -207,17 +259,23 @@
}
return Qtrue;
}
+#if SIZEOF_VOIDP == SIZEOF_LONG
+# define PTR2NUM(x) (LONG2NUM((long)(x)))
+#else
+# define PTR2NUM(x) (LL2NUM((LONG_LONG)(x)))
+#endif
+
static int
frame_edges_i(st_data_t key, st_data_t val, st_data_t arg)
{
VALUE edges = (VALUE)arg;
intptr_t weight = (intptr_t)val;
- rb_hash_aset(edges, rb_obj_id((VALUE)key), INT2FIX(weight));
+ rb_hash_aset(edges, PTR2NUM(key), INT2FIX(weight));
return ST_CONTINUE;
}
static int
frame_lines_i(st_data_t key, st_data_t val, st_data_t arg)
@@ -240,11 +298,11 @@
VALUE results = (VALUE)arg;
VALUE details = rb_hash_new();
VALUE name, file, edges, lines;
VALUE line;
- rb_hash_aset(results, rb_obj_id(frame), details);
+ rb_hash_aset(results, PTR2NUM(frame), details);
if (FIXNUM_P(frame)) {
name = _stackprof.fake_frame_names[FIX2INT(frame)];
file = _stackprof.empty_string;
line = INT2FIX(0);
@@ -312,19 +370,19 @@
st_free_table(_stackprof.frames);
_stackprof.frames = NULL;
if (_stackprof.raw && _stackprof.raw_samples_len) {
size_t len, n, o;
- VALUE raw_timestamp_deltas;
+ VALUE raw_sample_timestamps, raw_timestamp_deltas;
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
for (n = 0; n < _stackprof.raw_samples_len; n++) {
len = (size_t)_stackprof.raw_samples[n];
rb_ary_push(raw_samples, SIZET2NUM(len));
for (o = 0, n++; o < len; n++, o++)
- rb_ary_push(raw_samples, rb_obj_id(_stackprof.raw_samples[n]));
+ rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
}
free(_stackprof.raw_samples);
_stackprof.raw_samples = NULL;
@@ -332,21 +390,24 @@
_stackprof.raw_samples_capa = 0;
_stackprof.raw_sample_index = 0;
rb_hash_aset(results, sym_raw, raw_samples);
- raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_timestamp_deltas_len);
+ raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
+ raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
- for (n = 0; n < _stackprof.raw_timestamp_deltas_len; n++) {
- rb_ary_push(raw_timestamp_deltas, INT2FIX(_stackprof.raw_timestamp_deltas[n]));
+ for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
+ rb_ary_push(raw_sample_timestamps, ULL2NUM(_stackprof.raw_sample_times[n].timestamp_usec));
+ rb_ary_push(raw_timestamp_deltas, LL2NUM(_stackprof.raw_sample_times[n].delta_usec));
}
- free(_stackprof.raw_timestamp_deltas);
- _stackprof.raw_timestamp_deltas = NULL;
- _stackprof.raw_timestamp_deltas_len = 0;
- _stackprof.raw_timestamp_deltas_capa = 0;
+ free(_stackprof.raw_sample_times);
+ _stackprof.raw_sample_times = NULL;
+ _stackprof.raw_sample_times_len = 0;
+ _stackprof.raw_sample_times_capa = 0;
+ rb_hash_aset(results, sym_raw_sample_timestamps, raw_sample_timestamps);
rb_hash_aset(results, sym_raw_timestamp_deltas, raw_timestamp_deltas);
_stackprof.raw = 0;
}
@@ -422,18 +483,18 @@
{
st_update(table, key, numtable_increment_callback, (st_data_t)increment);
}
void
-stackprof_record_sample_for_stack(int num, int timestamp_delta)
+stackprof_record_sample_for_stack(int num, uint64_t sample_timestamp, int64_t timestamp_delta)
{
int i, n;
VALUE prev_frame = Qnil;
_stackprof.overall_samples++;
- if (_stackprof.raw) {
+ if (_stackprof.raw && num > 0) {
int found = 0;
/* If there's no sample buffer allocated, then allocate one. The buffer
* format is the number of frames (num), then the list of frames (from
* `_stackprof.raw_samples`), followed by the number of times this
@@ -481,24 +542,27 @@
}
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
}
/* If there's no timestamp delta buffer, allocate one */
- if (!_stackprof.raw_timestamp_deltas) {
- _stackprof.raw_timestamp_deltas_capa = 100;
- _stackprof.raw_timestamp_deltas = malloc(sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
- _stackprof.raw_timestamp_deltas_len = 0;
+ if (!_stackprof.raw_sample_times) {
+ _stackprof.raw_sample_times_capa = 100;
+ _stackprof.raw_sample_times = malloc(sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
+ _stackprof.raw_sample_times_len = 0;
}
/* Double the buffer size if it's too small */
- while (_stackprof.raw_timestamp_deltas_capa <= _stackprof.raw_timestamp_deltas_len + 1) {
- _stackprof.raw_timestamp_deltas_capa *= 2;
- _stackprof.raw_timestamp_deltas = realloc(_stackprof.raw_timestamp_deltas, sizeof(int) * _stackprof.raw_timestamp_deltas_capa);
+ while (_stackprof.raw_sample_times_capa <= _stackprof.raw_sample_times_len + 1) {
+ _stackprof.raw_sample_times_capa *= 2;
+ _stackprof.raw_sample_times = realloc(_stackprof.raw_sample_times, sizeof(sample_time_t) * _stackprof.raw_sample_times_capa);
}
- /* Store the time delta (which is the amount of time between samples) */
- _stackprof.raw_timestamp_deltas[_stackprof.raw_timestamp_deltas_len++] = timestamp_delta;
+ /* Store the time delta (which is the amount of microseconds between samples). */
+ _stackprof.raw_sample_times[_stackprof.raw_sample_times_len++] = (sample_time_t) {
+ .timestamp_usec = sample_timestamp,
+ .delta_usec = timestamp_delta,
+ };
}
for (i = 0; i < num; i++) {
int line = _stackprof.lines_buffer[i];
VALUE frame = _stackprof.frames_buffer[i];
@@ -527,99 +591,137 @@
prev_frame = frame;
}
if (_stackprof.raw) {
- gettimeofday(&_stackprof.last_sample_at, NULL);
+ capture_timestamp(&_stackprof.last_sample_at);
}
}
+// buffer the current profile frames
+// This must be async-signal-safe
+// Returns immediately if another set of frames are already in the buffer
void
-stackprof_record_sample()
+stackprof_buffer_sample(void)
{
- int timestamp_delta = 0;
+ if (_stackprof.buffer_count > 0) {
+ // Another sample is already pending
+ return;
+ }
+
+ uint64_t start_timestamp = 0;
+ int64_t timestamp_delta = 0;
int num;
if (_stackprof.raw) {
- struct timeval t;
- struct timeval diff;
- gettimeofday(&t, NULL);
- timersub(&t, &_stackprof.last_sample_at, &diff);
- timestamp_delta = (1000 * diff.tv_sec) + diff.tv_usec;
+ struct timestamp_t t;
+ capture_timestamp(&t);
+ start_timestamp = timestamp_usec(&t);
+ timestamp_delta = delta_usec(&t, &_stackprof.last_sample_at);
}
+
num = rb_profile_frames(0, sizeof(_stackprof.frames_buffer) / sizeof(VALUE), _stackprof.frames_buffer, _stackprof.lines_buffer);
- stackprof_record_sample_for_stack(num, timestamp_delta);
+
+ _stackprof.buffer_count = num;
+ _stackprof.buffer_time.timestamp_usec = start_timestamp;
+ _stackprof.buffer_time.delta_usec = timestamp_delta;
}
void
-stackprof_record_gc_samples()
+stackprof_record_gc_samples(void)
{
- int delta_to_first_unrecorded_gc_sample = 0;
- int i;
+ int64_t delta_to_first_unrecorded_gc_sample = 0;
+ uint64_t start_timestamp = 0;
+ size_t i;
if (_stackprof.raw) {
- struct timeval t;
- struct timeval diff;
- gettimeofday(&t, NULL);
- timersub(&t, &_stackprof.last_sample_at, &diff);
+ struct timestamp_t t;
+ capture_timestamp(&t);
+ start_timestamp = timestamp_usec(&t);
// We don't know when the GC samples were actually marked, so let's
// assume that they were marked at a perfectly regular interval.
- delta_to_first_unrecorded_gc_sample = (1000 * diff.tv_sec + diff.tv_usec) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
+ delta_to_first_unrecorded_gc_sample = delta_usec(&t, &_stackprof.last_sample_at) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
if (delta_to_first_unrecorded_gc_sample < 0) {
delta_to_first_unrecorded_gc_sample = 0;
}
}
-
for (i = 0; i < _stackprof.unrecorded_gc_samples; i++) {
- int timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
+ int64_t timestamp_delta = i == 0 ? delta_to_first_unrecorded_gc_sample : NUM2LONG(_stackprof.interval);
if (_stackprof.unrecorded_gc_marking_samples) {
_stackprof.frames_buffer[0] = FAKE_FRAME_MARK;
_stackprof.lines_buffer[0] = 0;
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
_stackprof.lines_buffer[1] = 0;
_stackprof.unrecorded_gc_marking_samples--;
- stackprof_record_sample_for_stack(2, timestamp_delta);
+ stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
} else if (_stackprof.unrecorded_gc_sweeping_samples) {
_stackprof.frames_buffer[0] = FAKE_FRAME_SWEEP;
_stackprof.lines_buffer[0] = 0;
_stackprof.frames_buffer[1] = FAKE_FRAME_GC;
_stackprof.lines_buffer[1] = 0;
_stackprof.unrecorded_gc_sweeping_samples--;
- stackprof_record_sample_for_stack(2, timestamp_delta);
+ stackprof_record_sample_for_stack(2, start_timestamp, timestamp_delta);
} else {
_stackprof.frames_buffer[0] = FAKE_FRAME_GC;
_stackprof.lines_buffer[0] = 0;
- stackprof_record_sample_for_stack(1, timestamp_delta);
+ stackprof_record_sample_for_stack(1, start_timestamp, timestamp_delta);
}
}
_stackprof.during_gc += _stackprof.unrecorded_gc_samples;
_stackprof.unrecorded_gc_samples = 0;
_stackprof.unrecorded_gc_marking_samples = 0;
_stackprof.unrecorded_gc_sweeping_samples = 0;
}
+// record the sample previously buffered by stackprof_buffer_sample
static void
-stackprof_gc_job_handler(void *data)
+stackprof_record_buffer(void)
{
+ stackprof_record_sample_for_stack(_stackprof.buffer_count, _stackprof.buffer_time.timestamp_usec, _stackprof.buffer_time.delta_usec);
+
+ // reset the buffer
+ _stackprof.buffer_count = 0;
+}
+
+static void
+stackprof_sample_and_record(void)
+{
+ stackprof_buffer_sample();
+ stackprof_record_buffer();
+}
+
+static void
+stackprof_job_record_gc(void *data)
+{
if (!_stackprof.running) return;
stackprof_record_gc_samples();
}
+#ifdef USE_POSTPONED_JOB
static void
-stackprof_job_handler(void *data)
+stackprof_job_sample_and_record(void *data)
{
if (!_stackprof.running) return;
- stackprof_record_sample();
+ stackprof_sample_and_record();
}
+#endif
static void
+stackprof_job_record_buffer(void *data)
+{
+ if (!_stackprof.running) return;
+
+ stackprof_record_buffer();
+}
+
+static void
stackprof_signal_handler(int sig, siginfo_t *sinfo, void *ucontext)
{
static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
_stackprof.overall_signals++;
@@ -634,16 +736,20 @@
_stackprof.unrecorded_gc_marking_samples++;
} else if (mode == sym_sweeping) {
_stackprof.unrecorded_gc_sweeping_samples++;
}
_stackprof.unrecorded_gc_samples++;
- rb_postponed_job_register_one(0, stackprof_gc_job_handler, (void*)0);
+ rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
} else {
#ifdef USE_POSTPONED_JOB
- rb_postponed_job_register_one(0, stackprof_job_handler, (void*)0);
+ rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
#else
- stackprof_job_handler(0);
+ // Buffer a sample immediately, if an existing sample exists this will
+ // return immediately
+ stackprof_buffer_sample();
+ // Enqueue a job to record the sample
+ rb_postponed_job_register_one(0, stackprof_job_record_buffer, (void*)0);
#endif
}
pthread_mutex_unlock(&lock);
}
@@ -651,21 +757,21 @@
stackprof_newobj_handler(VALUE tpval, void *data)
{
_stackprof.overall_signals++;
if (RTEST(_stackprof.interval) && _stackprof.overall_signals % NUM2LONG(_stackprof.interval))
return;
- stackprof_job_handler(0);
+ stackprof_sample_and_record();
}
static VALUE
stackprof_sample(VALUE self)
{
if (!_stackprof.running)
return Qfalse;
_stackprof.overall_signals++;
- stackprof_job_handler(0);
+ stackprof_sample_and_record();
return Qtrue;
}
static int
frame_mark_i(st_data_t key, st_data_t val, st_data_t arg)
@@ -740,10 +846,11 @@
S(lines);
S(version);
S(mode);
S(interval);
S(raw);
+ S(raw_sample_timestamps);
S(raw_timestamp_deltas);
S(out);
S(metadata);
S(ignore_gc);
S(frames);
@@ -762,12 +869,12 @@
_stackprof.raw_samples = NULL;
_stackprof.raw_samples_len = 0;
_stackprof.raw_samples_capa = 0;
_stackprof.raw_sample_index = 0;
- _stackprof.raw_timestamp_deltas = NULL;
- _stackprof.raw_timestamp_deltas_len = 0;
- _stackprof.raw_timestamp_deltas_capa = 0;
+ _stackprof.raw_sample_times = NULL;
+ _stackprof.raw_sample_times_len = 0;
+ _stackprof.raw_sample_times_capa = 0;
_stackprof.empty_string = rb_str_new_cstr("");
rb_global_variable(&_stackprof.empty_string);
for (i = 0; i < TOTAL_FAKE_FRAMES; i++) {