lib/opentelemetry/exporter/otlp/exporter.rb in opentelemetry-exporter-otlp-0.10.0 vs lib/opentelemetry/exporter/otlp/exporter.rb in opentelemetry-exporter-otlp-0.11.0
- old
+ new
@@ -29,16 +29,17 @@
KEEP_ALIVE_TIMEOUT = 30
RETRY_COUNT = 5
WRITE_TIMEOUT_SUPPORTED = Gem::Version.new(RUBY_VERSION) >= Gem::Version.new('2.6')
private_constant(:KEEP_ALIVE_TIMEOUT, :RETRY_COUNT, :WRITE_TIMEOUT_SUPPORTED)
- def initialize(endpoint: config_opt('OTEL_EXPORTER_OTLP_SPAN_ENDPOINT', 'OTEL_EXPORTER_OTLP_ENDPOINT', default: 'localhost:55681/v1/trace'), # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity
+ def initialize(endpoint: config_opt('OTEL_EXPORTER_OTLP_SPAN_ENDPOINT', 'OTEL_EXPORTER_OTLP_ENDPOINT', default: 'localhost:55681/v1/trace'), # rubocop:disable Metrics/AbcSize, Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
insecure: config_opt('OTEL_EXPORTER_OTLP_SPAN_INSECURE', 'OTEL_EXPORTER_OTLP_INSECURE', default: false),
certificate_file: config_opt('OTEL_EXPORTER_OTLP_SPAN_CERTIFICATE', 'OTEL_EXPORTER_OTLP_CERTIFICATE'),
headers: config_opt('OTEL_EXPORTER_OTLP_SPAN_HEADERS', 'OTEL_EXPORTER_OTLP_HEADERS'), # TODO: what format is expected here?
compression: config_opt('OTEL_EXPORTER_OTLP_SPAN_COMPRESSION', 'OTEL_EXPORTER_OTLP_COMPRESSION'),
- timeout: config_opt('OTEL_EXPORTER_OTLP_SPAN_TIMEOUT', 'OTEL_EXPORTER_OTLP_TIMEOUT', default: 10))
+ timeout: config_opt('OTEL_EXPORTER_OTLP_SPAN_TIMEOUT', 'OTEL_EXPORTER_OTLP_TIMEOUT', default: 10),
+ metrics_reporter: nil)
raise ArgumentError, "invalid url for OTLP::Exporter #{endpoint}" if invalid_url?("http://#{endpoint}")
raise ArgumentError, "unsupported compression key #{compression}" unless compression.nil? || compression == 'gzip'
raise ArgumentError, 'headers must be comma-separated k:v pairs or a Hash' unless valid_headers?(headers)
uri = URI "http://#{endpoint}"
@@ -52,10 +53,11 @@
when String then CSV.parse(headers, col_sep: ':', row_sep: ',').to_h
when Hash then headers
end
@timeout = timeout.to_f # TODO: use this as a default timeout when we implement timeouts in https://github.com/open-telemetry/opentelemetry-ruby/pull/341
@compression = compression
+ @metrics_reporter = metrics_reporter || OpenTelemetry::SDK::Trace::Export::MetricsReporter
@shutdown = false
end
# Called to export sampled {OpenTelemetry::SDK::Trace::SpanData} structs.
@@ -130,38 +132,38 @@
@http.open_timeout = remaining_timeout
@http.read_timeout = remaining_timeout
@http.write_timeout = remaining_timeout if WRITE_TIMEOUT_SUPPORTED
@http.start unless @http.started?
- response = @http.request(request)
+ response = measure_request_duration { @http.request(request) }
case response
when Net::HTTPOK
response.body # Read and discard body
SUCCESS
when Net::HTTPServiceUnavailable, Net::HTTPTooManyRequests
response.body # Read and discard body
- redo if backoff?(retry_after: response['Retry-After'], retry_count: retry_count += 1)
+ redo if backoff?(retry_after: response['Retry-After'], retry_count: retry_count += 1, reason: response.code)
FAILURE
when Net::HTTPRequestTimeOut, Net::HTTPGatewayTimeOut, Net::HTTPBadGateway
response.body # Read and discard body
- redo if backoff?(retry_count: retry_count += 1)
+ redo if backoff?(retry_count: retry_count += 1, reason: response.code)
FAILURE
when Net::HTTPBadRequest, Net::HTTPClientError, Net::HTTPServerError
# TODO: decode the body as a google.rpc.Status Protobuf-encoded message when https://github.com/open-telemetry/opentelemetry-collector/issues/1357 is fixed.
response.body # Read and discard body
FAILURE
when Net::HTTPRedirection
@http.finish
handle_redirect(response['location'])
- redo if backoff?(retry_after: 0, retry_count: retry_count += 1)
+ redo if backoff?(retry_after: 0, retry_count: retry_count += 1, reason: response.code)
else
@http.finish
FAILURE
end
rescue Net::OpenTimeout, Net::ReadTimeout
- retry if backoff?(retry_count: retry_count += 1)
+ retry if backoff?(retry_count: retry_count += 1, reason: 'timeout')
return FAILURE
end
ensure
# Reset timeouts to defaults for the next call.
@http.open_timeout = @timeout
@@ -175,13 +177,28 @@
def untraced
OpenTelemetry::Trace.with_span(OpenTelemetry::Trace::Span.new) { yield }
end
- def backoff?(retry_after: nil, retry_count:)
+ def measure_request_duration
+ start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+ begin
+ response = yield
+ ensure
+ stop = Process.clock_gettime(Process::CLOCK_MONOTONIC)
+ duration_ms = 1000.0 * (stop - start)
+ @metrics_reporter.record_value('otel.otlp_exporter.request_duration',
+ value: duration_ms,
+ labels: { 'status' => response&.code || 'unknown' })
+ end
+ end
+
+ def backoff?(retry_after: nil, retry_count:, reason:)
return false if retry_count > RETRY_COUNT
+ @metrics_reporter.add_to_counter('otel.otlp_exporter.failure', labels: { 'reason' => reason })
+
sleep_interval = nil
unless retry_after.nil?
sleep_interval =
begin
Integer(retry_after)
@@ -231,11 +248,11 @@
def as_otlp_span(span_data) # rubocop:disable Metrics/AbcSize, Metrics/MethodLength
Opentelemetry::Proto::Trace::V1::Span.new(
trace_id: span_data.trace_id,
span_id: span_data.span_id,
- trace_state: span_data.tracestate,
+ trace_state: span_data.tracestate.to_s,
parent_span_id: span_data.parent_span_id == OpenTelemetry::Trace::INVALID_SPAN_ID ? nil : span_data.parent_span_id,
name: span_data.name,
kind: as_otlp_span_kind(span_data.kind),
start_time_unix_nano: as_otlp_timestamp(span_data.start_timestamp),
end_time_unix_nano: as_otlp_timestamp(span_data.end_timestamp),
@@ -252,10 +269,10 @@
dropped_events_count: span_data.total_recorded_events - span_data.events&.size.to_i,
links: span_data.links&.map do |link|
Opentelemetry::Proto::Trace::V1::Span::Link.new(
trace_id: link.span_context.trace_id,
span_id: link.span_context.span_id,
- trace_state: link.span_context.tracestate,
+ trace_state: link.span_context.tracestate.to_s,
attributes: link.attributes&.map { |k, v| as_otlp_key_value(k, v) }
# TODO: track dropped_attributes_count in Span#trim_links
)
end,
dropped_links_count: span_data.total_recorded_links - span_data.links&.size.to_i,