lib/opentelemetry/exporter/otlp/exporter.rb in opentelemetry-exporter-otlp-0.10.0 vs lib/opentelemetry/exporter/otlp/exporter.rb in opentelemetry-exporter-otlp-0.11.0

- old
+ new

@@ -29,16 +29,17 @@ KEEP_ALIVE_TIMEOUT = 30 RETRY_COUNT = 5 WRITE_TIMEOUT_SUPPORTED = Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.6') private_constant(:KEEP_ALIVE_TIMEOUT, :RETRY_COUNT, :WRITE_TIMEOUT_SUPPORTED) - def initialize(endpoint: config_opt('OTEL_EXPORTER_OTLP_SPAN_ENDPOINT', 'OTEL_EXPORTER_OTLP_ENDPOINT', default: 'localhost:55681/v1/trace'), # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity + def initialize(endpoint: config_opt('OTEL_EXPORTER_OTLP_SPAN_ENDPOINT', 'OTEL_EXPORTER_OTLP_ENDPOINT', default: 'localhost:55681/v1/trace'), # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity insecure: config_opt('OTEL_EXPORTER_OTLP_SPAN_INSECURE', 'OTEL_EXPORTER_OTLP_INSECURE', default: false), certificate_file: config_opt('OTEL_EXPORTER_OTLP_SPAN_CERTIFICATE', 'OTEL_EXPORTER_OTLP_CERTIFICATE'), headers: config_opt('OTEL_EXPORTER_OTLP_SPAN_HEADERS', 'OTEL_EXPORTER_OTLP_HEADERS'), # TODO: what format is expected here? compression: config_opt('OTEL_EXPORTER_OTLP_SPAN_COMPRESSION', 'OTEL_EXPORTER_OTLP_COMPRESSION'), - timeout: config_opt('OTEL_EXPORTER_OTLP_SPAN_TIMEOUT', 'OTEL_EXPORTER_OTLP_TIMEOUT', default: 10)) + timeout: config_opt('OTEL_EXPORTER_OTLP_SPAN_TIMEOUT', 'OTEL_EXPORTER_OTLP_TIMEOUT', default: 10), + metrics_reporter: nil) raise ArgumentError, "invalid url for OTLP::Exporter #{endpoint}" if invalid_url?("http://#{endpoint}") raise ArgumentError, "unsupported compression key #{compression}" unless compression.nil? || compression == 'gzip' raise ArgumentError, 'headers must be comma-separated k:v pairs or a Hash' unless valid_headers?(headers) uri = URI "http://#{endpoint}" @@ -52,10 +53,11 @@ when String then CSV.parse(headers, col_sep: ':', row_sep: ',').to_h when Hash then headers end @timeout = timeout.to_f # TODO: use this as a default timeout when we implement timeouts in https://github.com/open-telemetry/opentelemetry-ruby/pull/341 @compression = compression + @metrics_reporter = metrics_reporter || OpenTelemetry::SDK::Trace::Export::MetricsReporter @shutdown = false end # Called to export sampled {OpenTelemetry::SDK::Trace::SpanData} structs. @@ -130,38 +132,38 @@ @http.open_timeout = remaining_timeout @http.read_timeout = remaining_timeout @http.write_timeout = remaining_timeout if WRITE_TIMEOUT_SUPPORTED @http.start unless @http.started? - response = @http.request(request) + response = measure_request_duration { @http.request(request) } case response when Net::HTTPOK response.body # Read and discard body SUCCESS when Net::HTTPServiceUnavailable, Net::HTTPTooManyRequests response.body # Read and discard body - redo if backoff?(retry_after: response['Retry-After'], retry_count: retry_count += 1) + redo if backoff?(retry_after: response['Retry-After'], retry_count: retry_count += 1, reason: response.code) FAILURE when Net::HTTPRequestTimeOut, Net::HTTPGatewayTimeOut, Net::HTTPBadGateway response.body # Read and discard body - redo if backoff?(retry_count: retry_count += 1) + redo if backoff?(retry_count: retry_count += 1, reason: response.code) FAILURE when Net::HTTPBadRequest, Net::HTTPClientError, Net::HTTPServerError # TODO: decode the body as a google.rpc.Status Protobuf-encoded message when https://github.com/open-telemetry/opentelemetry-collector/issues/1357 is fixed. response.body # Read and discard body FAILURE when Net::HTTPRedirection @http.finish handle_redirect(response['location']) - redo if backoff?(retry_after: 0, retry_count: retry_count += 1) + redo if backoff?(retry_after: 0, retry_count: retry_count += 1, reason: response.code) else @http.finish FAILURE end rescue Net::OpenTimeout, Net::ReadTimeout - retry if backoff?(retry_count: retry_count += 1) + retry if backoff?(retry_count: retry_count += 1, reason: 'timeout') return FAILURE end ensure # Reset timeouts to defaults for the next call. @http.open_timeout = @timeout @@ -175,13 +177,28 @@ def untraced OpenTelemetry::Trace.with_span(OpenTelemetry::Trace::Span.new) { yield } end - def backoff?(retry_after: nil, retry_count:) + def measure_request_duration + start = Process.clock_gettime(Process::CLOCK_MONOTONIC) + begin + response = yield + ensure + stop = Process.clock_gettime(Process::CLOCK_MONOTONIC) + duration_ms = 1000.0 * (stop - start) + @metrics_reporter.record_value('otel.otlp_exporter.request_duration', + value: duration_ms, + labels: { 'status' => response&.code || 'unknown' }) + end + end + + def backoff?(retry_after: nil, retry_count:, reason:) return false if retry_count > RETRY_COUNT + @metrics_reporter.add_to_counter('otel.otlp_exporter.failure', labels: { 'reason' => reason }) + sleep_interval = nil unless retry_after.nil? sleep_interval = begin Integer(retry_after) @@ -231,11 +248,11 @@ def as_otlp_span(span_data) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength Opentelemetry::Proto::Trace::V1::Span.new( trace_id: span_data.trace_id, span_id: span_data.span_id, - trace_state: span_data.tracestate, + trace_state: span_data.tracestate.to_s, parent_span_id: span_data.parent_span_id == OpenTelemetry::Trace::INVALID_SPAN_ID ? nil : span_data.parent_span_id, name: span_data.name, kind: as_otlp_span_kind(span_data.kind), start_time_unix_nano: as_otlp_timestamp(span_data.start_timestamp), end_time_unix_nano: as_otlp_timestamp(span_data.end_timestamp), @@ -252,10 +269,10 @@ dropped_events_count: span_data.total_recorded_events - span_data.events&.size.to_i, links: span_data.links&.map do |link| Opentelemetry::Proto::Trace::V1::Span::Link.new( trace_id: link.span_context.trace_id, span_id: link.span_context.span_id, - trace_state: link.span_context.tracestate, + trace_state: link.span_context.tracestate.to_s, attributes: link.attributes&.map { |k, v| as_otlp_key_value(k, v) } # TODO: track dropped_attributes_count in Span#trim_links ) end, dropped_links_count: span_data.total_recorded_links - span_data.links&.size.to_i,