lib/new_relic/agent/agent.rb in newrelic_rpm-8.13.1 vs lib/new_relic/agent/agent.rb in newrelic_rpm-8.14.0

- old
+ new

@@ -35,14 +35,17 @@ require 'new_relic/agent/attribute_filter' require 'new_relic/agent/adaptive_sampler' require 'new_relic/agent/connect/request_builder' require 'new_relic/agent/connect/response_handler' -require 'new_relic/agent/agent/start_worker_thread' -require 'new_relic/agent/agent/startup' -require 'new_relic/agent/agent/special_startup' -require 'new_relic/agent/agent/shutdown' +require 'new_relic/agent/agent_helpers/connect' +require 'new_relic/agent/agent_helpers/harvest' +require 'new_relic/agent/agent_helpers/start_worker_thread' +require 'new_relic/agent/agent_helpers/startup' +require 'new_relic/agent/agent_helpers/special_startup' +require 'new_relic/agent/agent_helpers/shutdown' +require 'new_relic/agent/agent_helpers/transmit' module NewRelic module Agent # The Agent is a singleton that is instantiated when the plugin is # activated. It collects performance data from ruby applications @@ -51,14 +54,17 @@ class Agent def self.config ::NewRelic::Agent.config end - include NewRelic::Agent::StartWorkerThread - include NewRelic::Agent::SpecialStartup - include NewRelic::Agent::Startup - include NewRelic::Agent::Shutdown + include NewRelic::Agent::AgentHelpers::Connect + include NewRelic::Agent::AgentHelpers::Harvest + include NewRelic::Agent::AgentHelpers::StartWorkerThread + include NewRelic::Agent::AgentHelpers::SpecialStartup + include NewRelic::Agent::AgentHelpers::Startup + include NewRelic::Agent::AgentHelpers::Shutdown + include NewRelic::Agent::AgentHelpers::Transmit def initialize init_basics init_components init_event_handlers @@ -319,166 +325,10 @@ # A shorthand for NewRelic::Control.instance def control NewRelic::Control.instance end - # This module is an artifact of a refactoring of the connect - # method - all of its methods are used in that context, so it - # can be refactored at will. It should be fully tested - module Connect - # number of attempts we've made to contact the server - attr_accessor :connect_attempts - - # Disconnect just sets the connect state to disconnected, preventing - # further retries. - def disconnect - @connect_state = :disconnected - true - end - - def connected? - @connect_state == :connected - end - - def disconnected? - @connect_state == :disconnected - end - - # Don't connect if we're already connected, or if we tried to connect - # and were rejected with prejudice because of a license issue, unless - # we're forced to by force_reconnect. - def should_connect?(force = false) - force || (!connected? && !disconnected?) - end - - # Per the spec at - # /agents/agent-specs/Collector-Response-Handling.md, retry - # connections after a specific backoff sequence to prevent - # hammering the server. - def connect_retry_period - NewRelic::CONNECT_RETRY_PERIODS[connect_attempts] || NewRelic::MAX_RETRY_PERIOD - end - - def note_connect_failure - self.connect_attempts += 1 - end - - # When we have a problem connecting to the server, we need - # to tell the user what happened, since this is not an error - # we can handle gracefully. - def log_error(error) - ::NewRelic::Agent.logger.error("Error establishing connection with New Relic Service at #{control.server}:", error) - end - - # When the server sends us an error with the license key, we - # want to tell the user that something went wrong, and let - # them know where to go to get a valid license key - # - # After this runs, it disconnects the agent so that it will - # no longer try to connect to the server, saving the - # application and the server load - def handle_license_error(error) - ::NewRelic::Agent.logger.error( \ - error.message, \ - "Visit NewRelic.com to obtain a valid license key, or to upgrade your account." - ) - disconnect - end - - def handle_unrecoverable_agent_error(error) - ::NewRelic::Agent.logger.error(error.message) - disconnect - shutdown - end - - # Checks whether we should send environment info, and if so, - # returns the snapshot from the local environment. - # Generating the EnvironmentReport has the potential to trigger - # require calls in Rails environments, so this method should only - # be called synchronously from on the main thread. - def environment_for_connect - @environment_report ||= Agent.config[:send_environment_info] ? Array(EnvironmentReport.new) : [] - end - - # Constructs and memoizes an event_harvest_config hash to be used in - # the payload sent during connect (and reconnect) - def event_harvest_config - @event_harvest_config ||= Configuration::EventHarvestConfig.from_config(Agent.config) - end - - # Builds the payload to send to the connect service, - # connects, then configures the agent using the response from - # the connect service - def connect_to_server - request_builder = ::NewRelic::Agent::Connect::RequestBuilder.new( \ - @service, - Agent.config, - event_harvest_config, - environment_for_connect - ) - connect_response = @service.connect(request_builder.connect_payload) - - response_handler = ::NewRelic::Agent::Connect::ResponseHandler.new(self, Agent.config) - response_handler.configure_agent(connect_response) - - log_connection(connect_response) if connect_response - connect_response - end - - # Logs when we connect to the server, for debugging purposes - # - makes sure we know if an agent has not connected - def log_connection(config_data) - ::NewRelic::Agent.logger.debug("Connected to NewRelic Service at #{@service.collector.name}") - ::NewRelic::Agent.logger.debug("Agent Run = #{@service.agent_id}.") - ::NewRelic::Agent.logger.debug("Connection data = #{config_data.inspect}") - if config_data['messages'] && config_data['messages'].any? - log_collector_messages(config_data['messages']) - end - end - - def log_collector_messages(messages) - messages.each do |message| - ::NewRelic::Agent.logger.send(message['level'].downcase, message['message']) - end - end - - # apdex_f is always 4 times the apdex_t - def apdex_f - (4 * Agent.config[:apdex_t]).to_f - end - - class WaitOnConnectTimeout < StandardError - end - - # Used for testing to let us know we've actually started to wait - def waited_on_connect? - @waited_on_connect - end - - def signal_connected - @wait_on_connect_mutex.synchronize do - @wait_on_connect_condition.signal - end - end - - def wait_on_connect(timeout) - return if connected? - - @waited_on_connect = true - NewRelic::Agent.logger.debug("Waiting on connect to complete.") - - @wait_on_connect_mutex.synchronize do - @wait_on_connect_condition.wait(@wait_on_connect_mutex, timeout) - end - - unless connected? - raise WaitOnConnectTimeout, "Agent was unable to connect in #{timeout} seconds." - end - end - end - include Connect - def container_for_endpoint(endpoint) case endpoint when :metric_data then @stats_engine when :transaction_sample_data then @transaction_sampler when :error_data then @error_collector.error_trace_aggregator @@ -504,316 +354,14 @@ NewRelic::Agent.logger.error("Error while merging #{endpoint} data from child: ", e) end public :merge_data_for_endpoint - # Establish a connection to New Relic servers. - # - # By default, if a connection has already been established, this method - # will be a no-op. - # - # @param [Hash] options - # @option options [Boolean] :keep_retrying (true) - # If true, this method will block until a connection is successfully - # established, continuing to retry upon failure. If false, this method - # will return after either successfully connecting, or after failing - # once. - # - # @option options [Boolean] :force_reconnect (false) - # If true, this method will force establishment of a new connection - # with New Relic, even if there is already an existing connection. - # This is useful primarily when re-establishing a new connection after - # forking off from a parent process. - # - def connect(options = {}) - opts = connect_options(options) - return unless should_connect?(opts[:force_reconnect]) - - ::NewRelic::Agent.logger.debug("Connecting Process to New Relic: #$0") - connect_to_server - @connected_pid = $$ - @connect_state = :connected - signal_connected - rescue NewRelic::Agent::ForceDisconnectException => e - handle_force_disconnect(e) - rescue NewRelic::Agent::LicenseException => e - handle_license_error(e) - rescue NewRelic::Agent::UnrecoverableAgentException => e - handle_unrecoverable_agent_error(e) - rescue StandardError, Timeout::Error, NewRelic::Agent::ServerConnectionException => e - retry if retry_from_error?(e, opts) - rescue Exception => e - ::NewRelic::Agent.logger.error("Exception of unexpected type during Agent#connect():", e) - - raise - end - - def connect_options(options) - { - keep_retrying: Agent.config[:keep_retrying], - force_reconnect: Agent.config[:force_reconnect] - }.merge(options) - end - - def retry_from_error?(e, opts) - # Allow a killed (aborting) thread to continue exiting during shutdown. - # See: https://github.com/newrelic/newrelic-ruby-agent/issues/340 - raise if Thread.current.status == 'aborting' - - log_error(e) - return false unless opts[:keep_retrying] - - note_connect_failure - ::NewRelic::Agent.logger.info("Will re-attempt in #{connect_retry_period} seconds") - sleep(connect_retry_period) - true - end - # Delegates to the control class to determine the root # directory of this project def determine_home_directory control.root end - - # Harvests data from the given container, sends it to the named endpoint - # on the service, and automatically merges back in upon a recoverable - # failure. - # - # The given container should respond to: - # - # #harvest! - # returns a payload that contains enumerable collection of data items and - # optional metadata to be sent to the collector. - # - # #reset! - # drop any stored data and reset to a clean state. - # - # #merge!(payload) - # merge the given payload back into the internal buffer of the - # container, so that it may be harvested again later. - # - def harvest_and_send_from_container(container, endpoint) - payload = harvest_from_container(container, endpoint) - sample_count = harvest_size(container, payload) - if sample_count > 0 - NewRelic::Agent.logger.debug("Sending #{sample_count} items to #{endpoint}") - send_data_to_endpoint(endpoint, payload, container) - end - end - - def harvest_size(container, items) - if container.respond_to?(:has_metadata?) && container.has_metadata? && !items.empty? - items.last.size - else - items.size - end - end - - def harvest_from_container(container, endpoint) - items = [] - begin - items = container.harvest! - rescue => e - NewRelic::Agent.logger.error("Failed to harvest #{endpoint} data, resetting. Error: ", e) - container.reset! - end - items - end - - def send_data_to_endpoint(endpoint, payload, container) - begin - @service.send(endpoint, payload) - rescue ForceRestartException, ForceDisconnectException - raise - rescue SerializationError => e - NewRelic::Agent.logger.warn("Failed to serialize data for #{endpoint}, discarding. Error: ", e) - rescue UnrecoverableServerException => e - NewRelic::Agent.logger.warn("#{endpoint} data was rejected by remote service, discarding. Error: ", e) - rescue ServerConnectionException => e - log_remote_unavailable(endpoint, e) - container.merge!(payload) - rescue => e - NewRelic::Agent.logger.info("Unable to send #{endpoint} data, will try again later. Error: ", e) - container.merge!(payload) - end - end - - def harvest_and_send_timeslice_data - TransactionTimeAggregator.harvest! - harvest_and_send_from_container(@stats_engine, :metric_data) - end - - def harvest_and_send_slowest_sql - harvest_and_send_from_container(@sql_sampler, :sql_trace_data) - end - - # This handles getting the transaction traces and then sending - # them across the wire. This includes gathering SQL - # explanations, stripping out stack traces, and normalizing - # SQL. note that we explain only the sql statements whose - # nodes' execution times exceed our threshold (to avoid - # unnecessary overhead of running explains on fast queries.) - def harvest_and_send_transaction_traces - harvest_and_send_from_container(@transaction_sampler, :transaction_sample_data) - end - - def harvest_and_send_for_agent_commands - harvest_and_send_from_container(@agent_command_router, :profile_data) - end - - def harvest_and_send_errors - harvest_and_send_from_container(@error_collector.error_trace_aggregator, :error_data) - end - - def harvest_and_send_analytic_event_data - harvest_and_send_from_container(transaction_event_aggregator, :analytic_event_data) - harvest_and_send_from_container(synthetics_event_aggregator, :analytic_event_data) - end - - def harvest_and_send_custom_event_data - harvest_and_send_from_container(@custom_event_aggregator, :custom_event_data) - end - - def harvest_and_send_error_event_data - harvest_and_send_from_container(@error_collector.error_event_aggregator, :error_event_data) - end - - def harvest_and_send_span_event_data - harvest_and_send_from_container(span_event_aggregator, :span_event_data) - end - - def harvest_and_send_log_event_data - harvest_and_send_from_container(@log_event_aggregator, :log_event_data) - end - - def check_for_and_handle_agent_commands - begin - @agent_command_router.check_for_and_handle_agent_commands - rescue ForceRestartException, ForceDisconnectException - raise - rescue UnrecoverableServerException => e - NewRelic::Agent.logger.warn("get_agent_commands message was rejected by remote service, discarding. Error: ", e) - rescue ServerConnectionException => e - log_remote_unavailable(:get_agent_commands, e) - rescue => e - NewRelic::Agent.logger.info("Error during check_for_and_handle_agent_commands, will retry later: ", e) - end - end - - def log_remote_unavailable(endpoint, e) - NewRelic::Agent.logger.debug("Unable to send #{endpoint} data, will try again later. Error: ", e) - NewRelic::Agent.record_metric("Supportability/remote_unavailable", 0.0) - NewRelic::Agent.record_metric("Supportability/remote_unavailable/#{endpoint.to_s}", 0.0) - end - - TRANSACTION_EVENT = "TransactionEvent".freeze - def transmit_analytic_event_data - transmit_single_data_type(:harvest_and_send_analytic_event_data, TRANSACTION_EVENT) - end - - CUSTOM_EVENT = "CustomEvent".freeze - def transmit_custom_event_data - transmit_single_data_type(:harvest_and_send_custom_event_data, CUSTOM_EVENT) - end - - ERROR_EVENT = "ErrorEvent".freeze - def transmit_error_event_data - transmit_single_data_type(:harvest_and_send_error_event_data, ERROR_EVENT) - end - - SPAN_EVENT = "SpanEvent".freeze - def transmit_span_event_data - transmit_single_data_type(:harvest_and_send_span_event_data, SPAN_EVENT) - end - - LOG_EVENT = "LogEvent".freeze - def transmit_log_event_data - transmit_single_data_type(:harvest_and_send_log_event_data, LOG_EVENT) - end - - def transmit_single_data_type(harvest_method, supportability_name) - now = Process.clock_gettime(Process::CLOCK_MONOTONIC) - - msg = "Sending #{supportability_name} data to New Relic Service" - ::NewRelic::Agent.logger.debug(msg) - - @service.session do # use http keep-alive - self.send(harvest_method) - end - ensure - duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - now - NewRelic::Agent.record_metric("Supportability/#{supportability_name}Harvest", duration) - end - - def transmit_data - now = Process.clock_gettime(Process::CLOCK_MONOTONIC) - ::NewRelic::Agent.logger.debug("Sending data to New Relic Service") - - @events.notify(:before_harvest) - @service.session do # use http keep-alive - harvest_and_send_data_types - - check_for_and_handle_agent_commands - harvest_and_send_for_agent_commands - end - ensure - NewRelic::Agent::Database.close_connections - duration = Process.clock_gettime(Process::CLOCK_MONOTONIC) - now - NewRelic::Agent.record_metric('Supportability/Harvest', duration) - end - - # This method contacts the server to send remaining data and - # let the server know that the agent is shutting down - this - # allows us to do things like accurately set the end of the - # lifetime of the process - # - # If this process comes from a parent process, it will not - # disconnect, so that the parent process can continue to send data - def graceful_disconnect - if connected? - begin - @service.request_timeout = 10 - - @events.notify(:before_shutdown) - transmit_data_types - shutdown_service - - ::NewRelic::Agent.logger.debug("Graceful disconnect complete") - rescue Timeout::Error, StandardError => e - ::NewRelic::Agent.logger.debug("Error when disconnecting #{e.class.name}: #{e.message}") - end - else - ::NewRelic::Agent.logger.debug("Bypassing graceful disconnect - agent not connected") - end - end - end - - def shutdown_service - if @connected_pid == $$ && !@service.kind_of?(NewRelic::Agent::NewRelicService) - ::NewRelic::Agent.logger.debug("Sending New Relic service agent run shutdown message") - @service.shutdown - else - ::NewRelic::Agent.logger.debug("This agent connected from parent process #{@connected_pid}--not sending shutdown") - end - end - - def transmit_data_types - transmit_data - transmit_analytic_event_data - transmit_custom_event_data - transmit_error_event_data - transmit_span_event_data - transmit_log_event_data - end - - def harvest_and_send_data_types - harvest_and_send_errors - harvest_and_send_error_event_data - harvest_and_send_transaction_traces - harvest_and_send_slowest_sql - harvest_and_send_timeslice_data - harvest_and_send_span_event_data - harvest_and_send_log_event_data end extend ClassMethods include InstanceMethods end