ext/stackprof/stackprof.c in stackprof-0.2.25 vs ext/stackprof/stackprof.c in stackprof-0.2.26
- old
+ new
@@ -100,11 +100,11 @@
VALUE interval;
VALUE out;
VALUE metadata;
int ignore_gc;
- VALUE *raw_samples;
+ uint64_t *raw_samples;
size_t raw_samples_len;
size_t raw_samples_capa;
size_t raw_sample_index;
struct timestamp_t last_sample_at;
@@ -118,10 +118,12 @@
size_t unrecorded_gc_samples;
size_t unrecorded_gc_marking_samples;
size_t unrecorded_gc_sweeping_samples;
st_table *frames;
+ timestamp_t gc_start_timestamp;
+
VALUE fake_frame_names[TOTAL_FAKE_FRAMES];
VALUE empty_string;
int buffer_count;
sample_time_t buffer_time;
@@ -131,11 +133,11 @@
pthread_t target_thread;
} _stackprof;
static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line;
static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines;
-static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
+static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_raw_lines, sym_metadata, sym_frames, sym_ignore_gc, sym_out;
static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping;
static VALUE sym_gc_samples, objtracer;
static VALUE gc_hook;
static VALUE rb_mStackProf;
@@ -372,27 +374,37 @@
if (_stackprof.raw && _stackprof.raw_samples_len) {
size_t len, n, o;
VALUE raw_sample_timestamps, raw_timestamp_deltas;
VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len);
+ VALUE raw_lines = rb_ary_new_capa(_stackprof.raw_samples_len);
for (n = 0; n < _stackprof.raw_samples_len; n++) {
len = (size_t)_stackprof.raw_samples[n];
rb_ary_push(raw_samples, SIZET2NUM(len));
+ rb_ary_push(raw_lines, SIZET2NUM(len));
- for (o = 0, n++; o < len; n++, o++)
- rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n]));
+ for (o = 0, n++; o < len; n++, o++) {
+ // Line is in the upper 16 bits
+ rb_ary_push(raw_lines, INT2NUM(_stackprof.raw_samples[n] >> 48));
+
+ VALUE frame = _stackprof.raw_samples[n] & ~((uint64_t)0xFFFF << 48);
+ rb_ary_push(raw_samples, PTR2NUM(frame));
+ }
+
rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
+ rb_ary_push(raw_lines, SIZET2NUM((size_t)_stackprof.raw_samples[n]));
}
free(_stackprof.raw_samples);
_stackprof.raw_samples = NULL;
_stackprof.raw_samples_len = 0;
_stackprof.raw_samples_capa = 0;
_stackprof.raw_sample_index = 0;
rb_hash_aset(results, sym_raw, raw_samples);
+ rb_hash_aset(results, sym_raw_lines, raw_lines);
raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len);
raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len);
for (n = 0; n < _stackprof.raw_sample_times_len; n++) {
@@ -518,11 +530,16 @@
* might be different, so we need to check the stack here. Stacks
* in the raw buffer are stored in the opposite direction of stacks
* in the frames buffer that came from Ruby. */
for (i = num-1, n = 0; i >= 0; i--, n++) {
VALUE frame = _stackprof.frames_buffer[i];
- if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != frame)
+ int line = _stackprof.lines_buffer[i];
+
+ // Encode the line in to the upper 16 bits.
+ uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame;
+
+ if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != key)
break;
}
if (i == -1) {
_stackprof.raw_samples[_stackprof.raw_samples_len-1] += 1;
found = 1;
@@ -536,11 +553,16 @@
* find the previously recorded stack size. */
_stackprof.raw_sample_index = _stackprof.raw_samples_len;
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num;
for (i = num-1; i >= 0; i--) {
VALUE frame = _stackprof.frames_buffer[i];
- _stackprof.raw_samples[_stackprof.raw_samples_len++] = frame;
+ int line = _stackprof.lines_buffer[i];
+
+ // Encode the line in to the upper 16 bits.
+ uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame;
+
+ _stackprof.raw_samples[_stackprof.raw_samples_len++] = key;
}
_stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1;
}
/* If there's no timestamp delta buffer, allocate one */
@@ -624,19 +646,19 @@
_stackprof.buffer_count = num;
_stackprof.buffer_time.timestamp_usec = start_timestamp;
_stackprof.buffer_time.delta_usec = timestamp_delta;
}
+// Postponed job
void
stackprof_record_gc_samples(void)
{
int64_t delta_to_first_unrecorded_gc_sample = 0;
uint64_t start_timestamp = 0;
size_t i;
if (_stackprof.raw) {
- struct timestamp_t t;
- capture_timestamp(&t);
+ struct timestamp_t t = _stackprof.gc_start_timestamp;
start_timestamp = timestamp_usec(&t);
// We don't know when the GC samples were actually marked, so let's
// assume that they were marked at a perfectly regular interval.
delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval);
@@ -754,10 +776,14 @@
if (mode == sym_marking) {
_stackprof.unrecorded_gc_marking_samples++;
} else if (mode == sym_sweeping) {
_stackprof.unrecorded_gc_sweeping_samples++;
}
+ if(!_stackprof.unrecorded_gc_samples) {
+ // record start
+ capture_timestamp(&_stackprof.gc_start_timestamp);
+ }
_stackprof.unrecorded_gc_samples++;
rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0);
} else {
if (stackprof_use_postponed_job) {
rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0);
@@ -810,15 +836,22 @@
rb_gc_mark(_stackprof.out);
if (_stackprof.frames)
st_foreach(_stackprof.frames, frame_mark_i, 0);
- for (int i = 0; i < _stackprof.buffer_count; i++) {
+ int i;
+ for (i = 0; i < _stackprof.buffer_count; i++) {
rb_gc_mark(_stackprof.frames_buffer[i]);
}
}
+static size_t
+stackprof_memsize(const void *data)
+{
+ return sizeof(_stackprof);
+}
+
static void
stackprof_atfork_prepare(void)
{
struct itimerval timer;
if (_stackprof.running) {
@@ -860,10 +893,19 @@
stackprof_at_exit(ruby_vm_t* vm)
{
ruby_vm_running = 0;
}
+static const rb_data_type_t stackprof_type = {
+ "StackProf",
+ {
+ stackprof_gc_mark,
+ NULL,
+ stackprof_memsize,
+ }
+};
+
void
Init_stackprof(void)
{
size_t i;
/*
@@ -891,10 +933,11 @@
S(lines);
S(version);
S(mode);
S(interval);
S(raw);
+ S(raw_lines);
S(raw_sample_timestamps);
S(raw_timestamp_deltas);
S(out);
S(metadata);
S(ignore_gc);
@@ -906,11 +949,11 @@
#undef S
/* Need to run this to warm the symbol table before we call this during GC */
rb_gc_latest_gc_info(sym_state);
- gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, &_stackprof);
rb_global_variable(&gc_hook);
+ gc_hook = TypedData_Wrap_Struct(rb_cObject, &stackprof_type, &_stackprof);
_stackprof.raw_samples = NULL;
_stackprof.raw_samples_len = 0;
_stackprof.raw_samples_capa = 0;
_stackprof.raw_sample_index = 0;