ext/stackprof/stackprof.c in stackprof-0.2.25 vs ext/stackprof/stackprof.c in stackprof-0.2.26

- old
+ new

@@ -100,11 +100,11 @@ VALUE interval; VALUE out; VALUE metadata; int ignore_gc; - VALUE *raw_samples; + uint64_t *raw_samples; size_t raw_samples_len; size_t raw_samples_capa; size_t raw_sample_index; struct timestamp_t last_sample_at; @@ -118,10 +118,12 @@ size_t unrecorded_gc_samples; size_t unrecorded_gc_marking_samples; size_t unrecorded_gc_sweeping_samples; st_table *frames; + timestamp_t gc_start_timestamp; + VALUE fake_frame_names[TOTAL_FAKE_FRAMES]; VALUE empty_string; int buffer_count; sample_time_t buffer_time; @@ -131,11 +133,11 @@ pthread_t target_thread; } _stackprof; static VALUE sym_object, sym_wall, sym_cpu, sym_custom, sym_name, sym_file, sym_line; static VALUE sym_samples, sym_total_samples, sym_missed_samples, sym_edges, sym_lines; -static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_metadata, sym_frames, sym_ignore_gc, sym_out; +static VALUE sym_version, sym_mode, sym_interval, sym_raw, sym_raw_lines, sym_metadata, sym_frames, sym_ignore_gc, sym_out; static VALUE sym_aggregate, sym_raw_sample_timestamps, sym_raw_timestamp_deltas, sym_state, sym_marking, sym_sweeping; static VALUE sym_gc_samples, objtracer; static VALUE gc_hook; static VALUE rb_mStackProf; @@ -372,27 +374,37 @@ if (_stackprof.raw && _stackprof.raw_samples_len) { size_t len, n, o; VALUE raw_sample_timestamps, raw_timestamp_deltas; VALUE raw_samples = rb_ary_new_capa(_stackprof.raw_samples_len); + VALUE raw_lines = rb_ary_new_capa(_stackprof.raw_samples_len); for (n = 0; n < _stackprof.raw_samples_len; n++) { len = (size_t)_stackprof.raw_samples[n]; rb_ary_push(raw_samples, SIZET2NUM(len)); + rb_ary_push(raw_lines, SIZET2NUM(len)); - for (o = 0, n++; o < len; n++, o++) - rb_ary_push(raw_samples, PTR2NUM(_stackprof.raw_samples[n])); + for (o = 0, n++; o < len; n++, o++) { + // Line is in the upper 16 bits + rb_ary_push(raw_lines, INT2NUM(_stackprof.raw_samples[n] >> 48)); + + VALUE frame = _stackprof.raw_samples[n] & ~((uint64_t)0xFFFF << 48); + rb_ary_push(raw_samples, PTR2NUM(frame)); + } + rb_ary_push(raw_samples, SIZET2NUM((size_t)_stackprof.raw_samples[n])); + rb_ary_push(raw_lines, SIZET2NUM((size_t)_stackprof.raw_samples[n])); } free(_stackprof.raw_samples); _stackprof.raw_samples = NULL; _stackprof.raw_samples_len = 0; _stackprof.raw_samples_capa = 0; _stackprof.raw_sample_index = 0; rb_hash_aset(results, sym_raw, raw_samples); + rb_hash_aset(results, sym_raw_lines, raw_lines); raw_sample_timestamps = rb_ary_new_capa(_stackprof.raw_sample_times_len); raw_timestamp_deltas = rb_ary_new_capa(_stackprof.raw_sample_times_len); for (n = 0; n < _stackprof.raw_sample_times_len; n++) { @@ -518,11 +530,16 @@ * might be different, so we need to check the stack here. Stacks * in the raw buffer are stored in the opposite direction of stacks * in the frames buffer that came from Ruby. */ for (i = num-1, n = 0; i >= 0; i--, n++) { VALUE frame = _stackprof.frames_buffer[i]; - if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != frame) + int line = _stackprof.lines_buffer[i]; + + // Encode the line in to the upper 16 bits. + uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame; + + if (_stackprof.raw_samples[_stackprof.raw_sample_index + 1 + n] != key) break; } if (i == -1) { _stackprof.raw_samples[_stackprof.raw_samples_len-1] += 1; found = 1; @@ -536,11 +553,16 @@ * find the previously recorded stack size. */ _stackprof.raw_sample_index = _stackprof.raw_samples_len; _stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)num; for (i = num-1; i >= 0; i--) { VALUE frame = _stackprof.frames_buffer[i]; - _stackprof.raw_samples[_stackprof.raw_samples_len++] = frame; + int line = _stackprof.lines_buffer[i]; + + // Encode the line in to the upper 16 bits. + uint64_t key = ((uint64_t)line << 48) | (uint64_t)frame; + + _stackprof.raw_samples[_stackprof.raw_samples_len++] = key; } _stackprof.raw_samples[_stackprof.raw_samples_len++] = (VALUE)1; } /* If there's no timestamp delta buffer, allocate one */ @@ -624,19 +646,19 @@ _stackprof.buffer_count = num; _stackprof.buffer_time.timestamp_usec = start_timestamp; _stackprof.buffer_time.delta_usec = timestamp_delta; } +// Postponed job void stackprof_record_gc_samples(void) { int64_t delta_to_first_unrecorded_gc_sample = 0; uint64_t start_timestamp = 0; size_t i; if (_stackprof.raw) { - struct timestamp_t t; - capture_timestamp(&t); + struct timestamp_t t = _stackprof.gc_start_timestamp; start_timestamp = timestamp_usec(&t); // We don't know when the GC samples were actually marked, so let's // assume that they were marked at a perfectly regular interval. delta_to_first_unrecorded_gc_sample = delta_usec(&_stackprof.last_sample_at, &t) - (_stackprof.unrecorded_gc_samples - 1) * NUM2LONG(_stackprof.interval); @@ -754,10 +776,14 @@ if (mode == sym_marking) { _stackprof.unrecorded_gc_marking_samples++; } else if (mode == sym_sweeping) { _stackprof.unrecorded_gc_sweeping_samples++; } + if(!_stackprof.unrecorded_gc_samples) { + // record start + capture_timestamp(&_stackprof.gc_start_timestamp); + } _stackprof.unrecorded_gc_samples++; rb_postponed_job_register_one(0, stackprof_job_record_gc, (void*)0); } else { if (stackprof_use_postponed_job) { rb_postponed_job_register_one(0, stackprof_job_sample_and_record, (void*)0); @@ -810,15 +836,22 @@ rb_gc_mark(_stackprof.out); if (_stackprof.frames) st_foreach(_stackprof.frames, frame_mark_i, 0); - for (int i = 0; i < _stackprof.buffer_count; i++) { + int i; + for (i = 0; i < _stackprof.buffer_count; i++) { rb_gc_mark(_stackprof.frames_buffer[i]); } } +static size_t +stackprof_memsize(const void *data) +{ + return sizeof(_stackprof); +} + static void stackprof_atfork_prepare(void) { struct itimerval timer; if (_stackprof.running) { @@ -860,10 +893,19 @@ stackprof_at_exit(ruby_vm_t* vm) { ruby_vm_running = 0; } +static const rb_data_type_t stackprof_type = { + "StackProf", + { + stackprof_gc_mark, + NULL, + stackprof_memsize, + } +}; + void Init_stackprof(void) { size_t i; /* @@ -891,10 +933,11 @@ S(lines); S(version); S(mode); S(interval); S(raw); + S(raw_lines); S(raw_sample_timestamps); S(raw_timestamp_deltas); S(out); S(metadata); S(ignore_gc); @@ -906,11 +949,11 @@ #undef S /* Need to run this to warm the symbol table before we call this during GC */ rb_gc_latest_gc_info(sym_state); - gc_hook = Data_Wrap_Struct(rb_cObject, stackprof_gc_mark, NULL, &_stackprof); rb_global_variable(&gc_hook); + gc_hook = TypedData_Wrap_Struct(rb_cObject, &stackprof_type, &_stackprof); _stackprof.raw_samples = NULL; _stackprof.raw_samples_len = 0; _stackprof.raw_samples_capa = 0; _stackprof.raw_sample_index = 0;