lib/new_relic/agent/agent.rb in newrelic_rpm-8.13.1 vs lib/new_relic/agent/agent.rb in newrelic_rpm-8.14.0
- old
+ new
@@ -35,14 +35,17 @@
require 'new_relic/agent/attribute_filter'
require 'new_relic/agent/adaptive_sampler'
require 'new_relic/agent/connect/request_builder'
require 'new_relic/agent/connect/response_handler'
-require 'new_relic/agent/agent/start_worker_thread'
-require 'new_relic/agent/agent/startup'
-require 'new_relic/agent/agent/special_startup'
-require 'new_relic/agent/agent/shutdown'
+require 'new_relic/agent/agent_helpers/connect'
+require 'new_relic/agent/agent_helpers/harvest'
+require 'new_relic/agent/agent_helpers/start_worker_thread'
+require 'new_relic/agent/agent_helpers/startup'
+require 'new_relic/agent/agent_helpers/special_startup'
+require 'new_relic/agent/agent_helpers/shutdown'
+require 'new_relic/agent/agent_helpers/transmit'
module NewRelic
module Agent
# The Agent is a singleton that is instantiated when the plugin is
# activated. It collects performance data from ruby applications
@@ -51,14 +54,17 @@
class Agent
def self.config
::NewRelic::Agent.config
end
- include NewRelic::Agent::StartWorkerThread
- include NewRelic::Agent::SpecialStartup
- include NewRelic::Agent::Startup
- include NewRelic::Agent::Shutdown
+ include NewRelic::Agent::AgentHelpers::Connect
+ include NewRelic::Agent::AgentHelpers::Harvest
+ include NewRelic::Agent::AgentHelpers::StartWorkerThread
+ include NewRelic::Agent::AgentHelpers::SpecialStartup
+ include NewRelic::Agent::AgentHelpers::Startup
+ include NewRelic::Agent::AgentHelpers::Shutdown
+ include NewRelic::Agent::AgentHelpers::Transmit
def initialize
init_basics
init_components
init_event_handlers
@@ -319,166 +325,10 @@
# A shorthand for NewRelic::Control.instance
def control
NewRelic::Control.instance
end
- # This module is an artifact of a refactoring of the connect
- # method - all of its methods are used in that context, so it
- # can be refactored at will. It should be fully tested
- module Connect
- # number of attempts we've made to contact the server
- attr_accessor :connect_attempts
-
- # Disconnect just sets the connect state to disconnected, preventing
- # further retries.
- def disconnect
- @connect_state = :disconnected
- true
- end
-
- def connected?
- @connect_state == :connected
- end
-
- def disconnected?
- @connect_state == :disconnected
- end
-
- # Don't connect if we're already connected, or if we tried to connect
- # and were rejected with prejudice because of a license issue, unless
- # we're forced to by force_reconnect.
- def should_connect?(force = false)
- force || (!connected? && !disconnected?)
- end
-
- # Per the spec at
- # /agents/agent-specs/Collector-Response-Handling.md, retry
- # connections after a specific backoff sequence to prevent
- # hammering the server.
- def connect_retry_period
- NewRelic::CONNECT_RETRY_PERIODS[connect_attempts] || NewRelic::MAX_RETRY_PERIOD
- end
-
- def note_connect_failure
- self.connect_attempts += 1
- end
-
- # When we have a problem connecting to the server, we need
- # to tell the user what happened, since this is not an error
- # we can handle gracefully.
- def log_error(error)
- ::NewRelic::Agent.logger.error("Error establishing connection with New Relic Service at #{control.server}:", error)
- end
-
- # When the server sends us an error with the license key, we
- # want to tell the user that something went wrong, and let
- # them know where to go to get a valid license key
- #
- # After this runs, it disconnects the agent so that it will
- # no longer try to connect to the server, saving the
- # application and the server load
- def handle_license_error(error)
- ::NewRelic::Agent.logger.error( \
- error.message, \
- "Visit NewRelic.com to obtain a valid license key, or to upgrade your account."
- )
- disconnect
- end
-
- def handle_unrecoverable_agent_error(error)
- ::NewRelic::Agent.logger.error(error.message)
- disconnect
- shutdown
- end
-
- # Checks whether we should send environment info, and if so,
- # returns the snapshot from the local environment.
- # Generating the EnvironmentReport has the potential to trigger
- # require calls in Rails environments, so this method should only
- # be called synchronously from on the main thread.
- def environment_for_connect
- @environment_report ||= Agent.config[:send_environment_info] ? Array(EnvironmentReport.new) : []
- end
-
- # Constructs and memoizes an event_harvest_config hash to be used in
- # the payload sent during connect (and reconnect)
- def event_harvest_config
- @event_harvest_config ||= Configuration::EventHarvestConfig.from_config(Agent.config)
- end
-
- # Builds the payload to send to the connect service,
- # connects, then configures the agent using the response from
- # the connect service
- def connect_to_server
- request_builder = ::NewRelic::Agent::Connect::RequestBuilder.new( \
- @service,
- Agent.config,
- event_harvest_config,
- environment_for_connect
- )
- connect_response = @service.connect(request_builder.connect_payload)
-
- response_handler = ::NewRelic::Agent::Connect::ResponseHandler.new(self, Agent.config)
- response_handler.configure_agent(connect_response)
-
- log_connection(connect_response) if connect_response
- connect_response
- end
-
- # Logs when we connect to the server, for debugging purposes
- # - makes sure we know if an agent has not connected
- def log_connection(config_data)
- ::NewRelic::Agent.logger.debug("Connected to NewRelic Service at #{@service.collector.name}")
- ::NewRelic::Agent.logger.debug("Agent Run = #{@service.agent_id}.")
- ::NewRelic::Agent.logger.debug("Connection data = #{config_data.inspect}")
- if config_data['messages'] && config_data['messages'].any?
- log_collector_messages(config_data['messages'])
- end
- end
-
- def log_collector_messages(messages)
- messages.each do |message|
- ::NewRelic::Agent.logger.send(message['level'].downcase, message['message'])
- end
- end
-
- # apdex_f is always 4 times the apdex_t
- def apdex_f
- (4 * Agent.config[:apdex_t]).to_f
- end
-
- class WaitOnConnectTimeout < StandardError
- end
-
- # Used for testing to let us know we've actually started to wait
- def waited_on_connect?
- @waited_on_connect
- end
-
- def signal_connected
- @wait_on_connect_mutex.synchronize do
- @wait_on_connect_condition.signal
- end
- end
-
- def wait_on_connect(timeout)
- return if connected?
-
- @waited_on_connect = true
- NewRelic::Agent.logger.debug("Waiting on connect to complete.")
-
- @wait_on_connect_mutex.synchronize do
- @wait_on_connect_condition.wait(@wait_on_connect_mutex, timeout)
- end
-
- unless connected?
- raise WaitOnConnectTimeout, "Agent was unable to connect in #{timeout} seconds."
- end
- end
- end
- include Connect
-
def container_for_endpoint(endpoint)
case endpoint
when :metric_data then @stats_engine
when :transaction_sample_data then @transaction_sampler
when :error_data then @error_collector.error_trace_aggregator
@@ -504,316 +354,14 @@
NewRelic::Agent.logger.error("Error while merging #{endpoint} data from child: ", e)
end
public :merge_data_for_endpoint
- # Establish a connection to New Relic servers.
- #
- # By default, if a connection has already been established, this method
- # will be a no-op.
- #
- # @param [Hash] options
- # @option options [Boolean] :keep_retrying (true)
- # If true, this method will block until a connection is successfully
- # established, continuing to retry upon failure. If false, this method
- # will return after either successfully connecting, or after failing
- # once.
- #
- # @option options [Boolean] :force_reconnect (false)
- # If true, this method will force establishment of a new connection
- # with New Relic, even if there is already an existing connection.
- # This is useful primarily when re-establishing a new connection after
- # forking off from a parent process.
- #
- def connect(options = {})
- opts = connect_options(options)
- return unless should_connect?(opts[:force_reconnect])
-
- ::NewRelic::Agent.logger.debug("Connecting Process to New Relic: #$0")
- connect_to_server
- @connected_pid = $$
- @connect_state = :connected
- signal_connected
- rescue NewRelic::Agent::ForceDisconnectException => e
- handle_force_disconnect(e)
- rescue NewRelic::Agent::LicenseException => e
- handle_license_error(e)
- rescue NewRelic::Agent::UnrecoverableAgentException => e
- handle_unrecoverable_agent_error(e)
- rescue StandardError, Timeout::Error, NewRelic::Agent::ServerConnectionException => e
- retry if retry_from_error?(e, opts)
- rescue Exception => e
- ::NewRelic::Agent.logger.error("Exception of unexpected type during Agent#connect():", e)
-
- raise
- end
-
- def connect_options(options)
- {
- keep_retrying: Agent.config[:keep_retrying],
- force_reconnect: Agent.config[:force_reconnect]
- }.merge(options)
- end
-
- def retry_from_error?(e, opts)
- # Allow a killed (aborting) thread to continue exiting during shutdown.
- # See: https://github.com/newrelic/newrelic-ruby-agent/issues/340
- raise if Thread.current.status == 'aborting'
-
- log_error(e)
- return false unless opts[:keep_retrying]
-
- note_connect_failure
- ::NewRelic::Agent.logger.info("Will re-attempt in #{connect_retry_period} seconds")
- sleep(connect_retry_period)
- true
- end
-
# Delegates to the control class to determine the root
# directory of this project
def determine_home_directory
control.root
end
-
- # Harvests data from the given container, sends it to the named endpoint
- # on the service, and automatically merges back in upon a recoverable
- # failure.
- #
- # The given container should respond to:
- #
- # #harvest!
- # returns a payload that contains enumerable collection of data items and
- # optional metadata to be sent to the collector.
- #
- # #reset!
- # drop any stored data and reset to a clean state.
- #
- # #merge!(payload)
- # merge the given payload back into the internal buffer of the
- # container, so that it may be harvested again later.
- #
- def harvest_and_send_from_container(container, endpoint)
- payload = harvest_from_container(container, endpoint)
- sample_count = harvest_size(container, payload)
- if sample_count > 0
- NewRelic::Agent.logger.debug("Sending #{sample_count} items to #{endpoint}")
- send_data_to_endpoint(endpoint, payload, container)
- end
- end
-
- def harvest_size(container, items)
- if container.respond_to?(:has_metadata?) && container.has_metadata? && !items.empty?
- items.last.size
- else
- items.size
- end
- end
-
- def harvest_from_container(container, endpoint)
- items = []
- begin
- items = container.harvest!
- rescue => e
- NewRelic::Agent.logger.error("Failed to harvest #{endpoint} data, resetting. Error: ", e)
- container.reset!
- end
- items
- end
-
- def send_data_to_endpoint(endpoint, payload, container)
- begin
- @service.send(endpoint, payload)
- rescue ForceRestartException, ForceDisconnectException
- raise
- rescue SerializationError => e
- NewRelic::Agent.logger.warn("Failed to serialize data for #{endpoint}, discarding. Error: ", e)
- rescue UnrecoverableServerException => e
- NewRelic::Agent.logger.warn("#{endpoint} data was rejected by remote service, discarding. Error: ", e)
- rescue ServerConnectionException => e
- log_remote_unavailable(endpoint, e)
- container.merge!(payload)
- rescue => e
- NewRelic::Agent.logger.info("Unable to send #{endpoint} data, will try again later. Error: ", e)
- container.merge!(payload)
- end
- end
-
- def harvest_and_send_timeslice_data
- TransactionTimeAggregator.harvest!
- harvest_and_send_from_container(@stats_engine, :metric_data)
- end
-
- def harvest_and_send_slowest_sql
- harvest_and_send_from_container(@sql_sampler, :sql_trace_data)
- end
-
- # This handles getting the transaction traces and then sending
- # them across the wire. This includes gathering SQL
- # explanations, stripping out stack traces, and normalizing
- # SQL. note that we explain only the sql statements whose
- # nodes' execution times exceed our threshold (to avoid
- # unnecessary overhead of running explains on fast queries.)
- def harvest_and_send_transaction_traces
- harvest_and_send_from_container(@transaction_sampler, :transaction_sample_data)
- end
-
- def harvest_and_send_for_agent_commands
- harvest_and_send_from_container(@agent_command_router, :profile_data)
- end
-
- def harvest_and_send_errors
- harvest_and_send_from_container(@error_collector.error_trace_aggregator, :error_data)
- end
-
- def harvest_and_send_analytic_event_data
- harvest_and_send_from_container(transaction_event_aggregator, :analytic_event_data)
- harvest_and_send_from_container(synthetics_event_aggregator, :analytic_event_data)
- end
-
- def harvest_and_send_custom_event_data
- harvest_and_send_from_container(@custom_event_aggregator, :custom_event_data)
- end
-
- def harvest_and_send_error_event_data
- harvest_and_send_from_container(@error_collector.error_event_aggregator, :error_event_data)
- end
-
- def harvest_and_send_span_event_data
- harvest_and_send_from_container(span_event_aggregator, :span_event_data)
- end
-
- def harvest_and_send_log_event_data
- harvest_and_send_from_container(@log_event_aggregator, :log_event_data)
- end
-
- def check_for_and_handle_agent_commands
- begin
- @agent_command_router.check_for_and_handle_agent_commands
- rescue ForceRestartException, ForceDisconnectException
- raise
- rescue UnrecoverableServerException => e
- NewRelic::Agent.logger.warn("get_agent_commands message was rejected by remote service, discarding. Error: ", e)
- rescue ServerConnectionException => e
- log_remote_unavailable(:get_agent_commands, e)
- rescue => e
- NewRelic::Agent.logger.info("Error during check_for_and_handle_agent_commands, will retry later: ", e)
- end
- end
-
- def log_remote_unavailable(endpoint, e)
- NewRelic::Agent.logger.debug("Unable to send #{endpoint} data, will try again later. Error: ", e)
- NewRelic::Agent.record_metric("Supportability/remote_unavailable", 0.0)
- NewRelic::Agent.record_metric("Supportability/remote_unavailable/#{endpoint.to_s}", 0.0)
- end
-
- TRANSACTION_EVENT = "TransactionEvent".freeze
- def transmit_analytic_event_data
- transmit_single_data_type(:harvest_and_send_analytic_event_data, TRANSACTION_EVENT)
- end
-
- CUSTOM_EVENT = "CustomEvent".freeze
- def transmit_custom_event_data
- transmit_single_data_type(:harvest_and_send_custom_event_data, CUSTOM_EVENT)
- end
-
- ERROR_EVENT = "ErrorEvent".freeze
- def transmit_error_event_data
- transmit_single_data_type(:harvest_and_send_error_event_data, ERROR_EVENT)
- end
-
- SPAN_EVENT = "SpanEvent".freeze
- def transmit_span_event_data
- transmit_single_data_type(:harvest_and_send_span_event_data, SPAN_EVENT)
- end
-
- LOG_EVENT = "LogEvent".freeze
- def transmit_log_event_data
- transmit_single_data_type(:harvest_and_send_log_event_data, LOG_EVENT)
- end
-
- def transmit_single_data_type(harvest_method, supportability_name)
- now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
-
- msg = "Sending #{supportability_name} data to New Relic Service"
- ::NewRelic::Agent.logger.debug(msg)
-
- @service.session do # use http keep-alive
- self.send(harvest_method)
- end
- ensure
- duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - now
- NewRelic::Agent.record_metric("Supportability/#{supportability_name}Harvest", duration)
- end
-
- def transmit_data
- now = Process.clock_gettime(Process::CLOCK_MONOTONIC)
- ::NewRelic::Agent.logger.debug("Sending data to New Relic Service")
-
- @events.notify(:before_harvest)
- @service.session do # use http keep-alive
- harvest_and_send_data_types
-
- check_for_and_handle_agent_commands
- harvest_and_send_for_agent_commands
- end
- ensure
- NewRelic::Agent::Database.close_connections
- duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - now
- NewRelic::Agent.record_metric('Supportability/Harvest', duration)
- end
-
- # This method contacts the server to send remaining data and
- # let the server know that the agent is shutting down - this
- # allows us to do things like accurately set the end of the
- # lifetime of the process
- #
- # If this process comes from a parent process, it will not
- # disconnect, so that the parent process can continue to send data
- def graceful_disconnect
- if connected?
- begin
- @service.request_timeout = 10
-
- @events.notify(:before_shutdown)
- transmit_data_types
- shutdown_service
-
- ::NewRelic::Agent.logger.debug("Graceful disconnect complete")
- rescue Timeout::Error, StandardError => e
- ::NewRelic::Agent.logger.debug("Error when disconnecting #{e.class.name}: #{e.message}")
- end
- else
- ::NewRelic::Agent.logger.debug("Bypassing graceful disconnect - agent not connected")
- end
- end
- end
-
- def shutdown_service
- if @connected_pid == $$ && !@service.kind_of?(NewRelic::Agent::NewRelicService)
- ::NewRelic::Agent.logger.debug("Sending New Relic service agent run shutdown message")
- @service.shutdown
- else
- ::NewRelic::Agent.logger.debug("This agent connected from parent process #{@connected_pid}--not sending shutdown")
- end
- end
-
- def transmit_data_types
- transmit_data
- transmit_analytic_event_data
- transmit_custom_event_data
- transmit_error_event_data
- transmit_span_event_data
- transmit_log_event_data
- end
-
- def harvest_and_send_data_types
- harvest_and_send_errors
- harvest_and_send_error_event_data
- harvest_and_send_transaction_traces
- harvest_and_send_slowest_sql
- harvest_and_send_timeslice_data
- harvest_and_send_span_event_data
- harvest_and_send_log_event_data
end
extend ClassMethods
include InstanceMethods
end