lib/new_relic/agent/agent.rb in newrelic_rpm-8.8.0 vs lib/new_relic/agent/agent.rb in newrelic_rpm-8.9.0

- old
+ new

@@ -35,10 +35,15 @@ require 'new_relic/agent/attribute_filter' require 'new_relic/agent/adaptive_sampler' require 'new_relic/agent/connect/request_builder' require 'new_relic/agent/connect/response_handler' +require 'new_relic/agent/agent/start_worker_thread' +require 'new_relic/agent/agent/startup' +require 'new_relic/agent/agent/special_startup' +require 'new_relic/agent/agent/shutdown' + module NewRelic module Agent # The Agent is a singleton that is instantiated when the plugin is # activated. It collects performance data from ruby applications # in realtime as the application runs, and periodically sends that @@ -46,10 +51,15 @@ class Agent def self.config ::NewRelic::Agent.config end + include NewRelic::Agent::StartWorkerThread + include NewRelic::Agent::SpecialStartup + include NewRelic::Agent::Startup + include NewRelic::Agent::Shutdown + def initialize @started = false @event_loop = nil @worker_thread = nil @@ -220,71 +230,24 @@ @service.shutdown disconnect end end - # True if we have initialized and completed 'start' - def started? - @started - end - - # Attempt a graceful shutdown of the agent, flushing any remaining - # data. - def shutdown - return unless started? - ::NewRelic::Agent.logger.info "Starting Agent shutdown" - - stop_event_loop - trap_signals_for_litespeed - untraced_graceful_disconnect - revert_to_default_configuration - - @started = nil - Control.reset - end - def revert_to_default_configuration Agent.config.remove_config_type(:manual) Agent.config.remove_config_type(:server) end - # If the @worker_thread encounters an error during the attempt to connect to the collector - # then the connect attempts enter an exponential backoff retry loop. To avoid potential - # race conditions with shutting down while also attempting to reconnect, we join the - # @worker_thread with a timeout threshold. This allows potentially connecting and flushing - # pending data to the server, but without waiting indefinitely for a reconnect to succeed. - # The use-case where this typically arises is in cronjob scheduled rake tasks where there's - # also some network stability/latency issues happening. - def stop_event_loop - @event_loop.stop if @event_loop - # Wait the end of the event loop thread. - if @worker_thread - unless @worker_thread.join(3) - ::NewRelic::Agent.logger.debug "Event loop thread did not stop within 3 seconds" - end - end - end - def trap_signals_for_litespeed # if litespeed, then ignore all future SIGUSR1 - it's # litespeed trying to shut us down if Agent.config[:dispatcher] == :litespeed Signal.trap("SIGUSR1", "IGNORE") Signal.trap("SIGTERM", "IGNORE") end end - def untraced_graceful_disconnect - begin - NewRelic::Agent.disable_all_tracing do - graceful_disconnect - end - rescue => e - ::NewRelic::Agent.logger.error e - end - end - # Sets a thread local variable as to whether we should or # should not record sql in the current thread. Returns the # previous value, if there is one def set_record_sql(should_record) # THREAD_LOCAL_ACCESS state = Tracer.state @@ -305,259 +268,10 @@ # to what it was before we pushed the current flag. def pop_trace_execution_flag # THREAD_LOCAL_ACCESS Tracer.state.pop_traced end - # Herein lies the corpse of the former 'start' method. May - # its unmatched flog score rest in pieces. - module Start - # Check whether we have already started, which is an error condition - def already_started? - if started? - ::NewRelic::Agent.logger.error("Agent Started Already!") - true - end - end - - # The agent is disabled when it is not force enabled by the - # 'agent_enabled' option (e.g. in a manual start), or - # enabled normally through the configuration file - def disabled? - !Agent.config[:agent_enabled] - end - - # Log startup information that we almost always want to know - def log_startup - log_environment - log_dispatcher - log_app_name - end - - # Log the environment the app thinks it's running in. - # Useful in debugging, as this is the key for config YAML lookups. - def log_environment - ::NewRelic::Agent.logger.info "Environment: #{NewRelic::Control.instance.env}" - end - - # Logs the dispatcher to the log file to assist with - # debugging. When no debugger is present, logs this fact to - # assist with proper dispatcher detection - def log_dispatcher - dispatcher_name = Agent.config[:dispatcher].to_s - - if dispatcher_name.empty? - ::NewRelic::Agent.logger.info 'No known dispatcher detected.' - else - ::NewRelic::Agent.logger.info "Dispatcher: #{dispatcher_name}" - end - end - - def log_app_name - ::NewRelic::Agent.logger.info "Application: #{Agent.config[:app_name].join(", ")}" - end - - def log_ignore_url_regexes - regexes = NewRelic::Agent.config[:'rules.ignore_url_regexes'] - - unless regexes.empty? - ::NewRelic::Agent.logger.info "Ignoring URLs that match the following regexes: #{regexes.map(&:inspect).join(", ")}." - end - end - - # Logs the configured application names - def app_name_configured? - names = Agent.config[:app_name] - return names.respond_to?(:any?) && names.any? - end - - # Connecting in the foreground blocks further startup of the - # agent until we have a connection - useful in cases where - # you're trying to log a very-short-running process and want - # to get statistics from before a server connection - # (typically 20 seconds) exists - def connect_in_foreground - NewRelic::Agent.disable_all_tracing { connect(:keep_retrying => false) } - end - - # This matters when the following three criteria are met: - # - # 1. A Sinatra 'classic' application is being run - # 2. The app is being run by executing the main file directly, rather - # than via a config.ru file. - # 3. newrelic_rpm is required *after* sinatra - # - # In this case, the entire application runs from an at_exit handler in - # Sinatra, and if we were to install ours, it would be executed before - # the one in Sinatra, meaning that we'd shutdown the agent too early - # and never collect any data. - def sinatra_classic_app? - ( - defined?(Sinatra::Application) && - Sinatra::Application.respond_to?(:run) && - Sinatra::Application.run? - ) - end - - def should_install_exit_handler? - return false unless Agent.config[:send_data_on_exit] - !sinatra_classic_app? || Agent.config[:force_install_exit_handler] - end - - def install_exit_handler - return unless should_install_exit_handler? - NewRelic::Agent.logger.debug("Installing at_exit handler") - at_exit { shutdown } - end - - # Classy logging of the agent version and the current pid, - # so we can disambiguate processes in the log file and make - # sure they're running a reasonable version - def log_version_and_pid - ::NewRelic::Agent.logger.debug "New Relic Ruby Agent #{NewRelic::VERSION::STRING} Initialized: pid = #{$$}" - end - - # Warn the user if they have configured their agent not to - # send data, that way we can see this clearly in the log file - def monitoring? - if Agent.config[:monitor_mode] - true - else - ::NewRelic::Agent.logger.warn('Agent configured not to send data in this environment.') - false - end - end - - # Tell the user when the license key is missing so they can - # fix it by adding it to the file - def has_license_key? - if Agent.config[:license_key] && Agent.config[:license_key].length > 0 - true - else - ::NewRelic::Agent.logger.warn("No license key found. " + - "This often means your newrelic.yml file was not found, or it lacks a section for the running environment, '#{NewRelic::Control.instance.env}'. You may also want to try linting your newrelic.yml to ensure it is valid YML.") - false - end - end - - # A correct license key exists and is of the proper length - def has_correct_license_key? - has_license_key? && correct_license_length - end - - # A license key is an arbitrary 40 character string, - # usually looks something like a SHA1 hash - def correct_license_length - key = Agent.config[:license_key] - - if key.length == 40 - true - else - ::NewRelic::Agent.logger.error("Invalid license key: #{key}") - false - end - end - - # If we're using a dispatcher that forks before serving - # requests, we need to wait until the children are forked - # before connecting, otherwise the parent process sends useless data - def using_forking_dispatcher? - # TODO: MAJOR VERSION - remove :rainbows - if [:puma, :passenger, :rainbows, :unicorn].include? Agent.config[:dispatcher] - ::NewRelic::Agent.logger.info "Deferring startup of agent reporting thread because #{Agent.config[:dispatcher]} may fork." - true - else - false - end - end - - # Return true if we're using resque and it hasn't had a chance to (potentially) - # daemonize itself. This avoids hanging when there's a Thread started - # before Resque calls Process.daemon (Jira RUBY-857) - def defer_for_resque? - NewRelic::Agent.config[:dispatcher] == :resque && - NewRelic::Agent::Instrumentation::Resque::Helper.resque_fork_per_job? && - !PipeChannelManager.listener.started? - end - - def in_resque_child_process? - defined?(@service) && @service.is_a?(PipeService) - end - - # Sanity-check the agent configuration and start the agent, - # setting up the worker thread and the exit handler to shut - # down the agent - def check_config_and_start_agent - return unless monitoring? && has_correct_license_key? - return if using_forking_dispatcher? - setup_and_start_agent - end - - # This is the shared method between the main agent startup and the - # after_fork call restarting the thread in deferred dispatchers. - # - # Treatment of @started and env report is important to get right. - def setup_and_start_agent(options = {}) - @started = true - @harvester.mark_started - - unless in_resque_child_process? - install_exit_handler - environment_for_connect - @harvest_samplers.load_samplers unless Agent.config[:disable_samplers] - end - - connect_in_foreground if Agent.config[:sync_startup] - start_worker_thread(options) - end - end - - include Start - - def defer_for_delayed_job? - NewRelic::Agent.config[:dispatcher] == :delayed_job && - !NewRelic::DelayedJobInjection.worker_name - end - - # Check to see if the agent should start, returning +true+ if it should. - def agent_should_start? - return false if already_started? || disabled? - - if defer_for_delayed_job? - ::NewRelic::Agent.logger.debug "Deferring startup for DelayedJob" - return false - end - - if defer_for_resque? - ::NewRelic::Agent.logger.debug "Deferring startup for Resque in case it daemonizes" - return false - end - - unless app_name_configured? - NewRelic::Agent.logger.error "No application name configured.", - "The Agent cannot start without at least one. Please check your ", - "newrelic.yml and ensure that it is valid and has at least one ", - "value set for app_name in the #{NewRelic::Control.instance.env} ", - "environment." - return false - end - - return true - end - - # Logs a bunch of data and starts the agent, if needed - def start - return unless agent_should_start? - - log_startup - check_config_and_start_agent - log_version_and_pid - - events.subscribe(:initial_configuration_complete) do - log_ignore_url_regexes - end - end - # Clear out the metric data, errors, and transaction traces, etc. def drop_buffered_data @stats_engine.reset! @error_collector.drop_buffered_data @transaction_sampler.reset! @@ -589,152 +303,9 @@ transmit_log_event_data end end private - - # All of this module used to be contained in the - # start_worker_thread method - this is an artifact of - # refactoring and can be moved, renamed, etc at will - module StartWorkerThread - def create_event_loop - EventLoop.new - end - - LOG_ONCE_KEYS_RESET_PERIOD = 60.0 - - # Certain event types may sometimes need to be on the same interval as metrics, - # so we will check config assigned in EventHarvestConfig to determine the interval - # on which to report them - def interval_for event_type - interval = Agent.config[:"event_report_period.#{event_type}"] - :"#{interval}_second_harvest" - end - - TRANSACTION_EVENT_DATA = "transaction_event_data".freeze - CUSTOM_EVENT_DATA = "custom_event_data".freeze - ERROR_EVENT_DATA = "error_event_data".freeze - SPAN_EVENT_DATA = "span_event_data".freeze - LOG_EVENT_DATA = "log_event_data".freeze - - def create_and_run_event_loop - data_harvest = :"#{Agent.config[:data_report_period]}_second_harvest" - event_harvest = :"#{Agent.config[:event_report_period]}_second_harvest" - - @event_loop = create_event_loop - @event_loop.on(data_harvest) do - transmit_data - end - - @event_loop.on(interval_for TRANSACTION_EVENT_DATA) do - transmit_analytic_event_data - end - @event_loop.on(interval_for CUSTOM_EVENT_DATA) do - transmit_custom_event_data - end - @event_loop.on(interval_for ERROR_EVENT_DATA) do - transmit_error_event_data - end - @event_loop.on(interval_for SPAN_EVENT_DATA) do - transmit_span_event_data - end - @event_loop.on(interval_for LOG_EVENT_DATA) do - transmit_log_event_data - end - - @event_loop.on(:reset_log_once_keys) do - ::NewRelic::Agent.logger.clear_already_logged - end - @event_loop.fire_every(Agent.config[:data_report_period], data_harvest) - @event_loop.fire_every(Agent.config[:event_report_period], event_harvest) - @event_loop.fire_every(LOG_ONCE_KEYS_RESET_PERIOD, :reset_log_once_keys) - - @event_loop.run - end - - # Handles the case where the server tells us to restart - - # this clears the data, clears connection attempts, and - # waits a while to reconnect. - def handle_force_restart(error) - ::NewRelic::Agent.logger.debug error.message - drop_buffered_data - @service.force_restart if @service - @connect_state = :pending - sleep 30 - end - - # when a disconnect is requested, stop the current thread, which - # is the worker thread that gathers data and talks to the - # server. - def handle_force_disconnect(error) - ::NewRelic::Agent.logger.warn "Agent received a ForceDisconnectException from the server, disconnecting. (#{error.message})" - disconnect - end - - # Handles an unknown error in the worker thread by logging - # it and disconnecting the agent, since we are now in an - # unknown state. - def handle_other_error(error) - ::NewRelic::Agent.logger.error "Unhandled error in worker thread, disconnecting." - # These errors are fatal (that is, they will prevent the agent from - # reporting entirely), so we really want backtraces when they happen - ::NewRelic::Agent.logger.log_exception(:error, error) - disconnect - end - - # a wrapper method to handle all the errors that can happen - # in the connection and worker thread system. This - # guarantees a no-throw from the background thread. - def catch_errors - yield - rescue NewRelic::Agent::ForceRestartException => e - handle_force_restart(e) - retry - rescue NewRelic::Agent::ForceDisconnectException => e - handle_force_disconnect(e) - rescue => e - handle_other_error(e) - end - - # This is the method that is run in a new thread in order to - # background the harvesting and sending of data during the - # normal operation of the agent. - # - # Takes connection options that determine how we should - # connect to the server, and loops endlessly - typically we - # never return from this method unless we're shutting down - # the agent - def deferred_work!(connection_options) - catch_errors do - NewRelic::Agent.disable_all_tracing do - connect(connection_options) - if connected? - create_and_run_event_loop - # never reaches here unless there is a problem or - # the agent is exiting - else - ::NewRelic::Agent.logger.debug "No connection. Worker thread ending." - end - end - end - end - end - include StartWorkerThread - - # Try to launch the worker thread and connect to the server. - # - # See #connect for a description of connection_options. - def start_worker_thread(connection_options = {}) - if disable = NewRelic::Agent.config[:disable_harvest_thread] - NewRelic::Agent.logger.info "Not starting Ruby Agent worker thread because :disable_harvest_thread is #{disable}" - return - end - - ::NewRelic::Agent.logger.debug "Creating Ruby Agent worker thread." - @worker_thread = Threading::AgentThread.create('Worker Loop') do - deferred_work!(connection_options) - end - end # A shorthand for NewRelic::Control.instance def control NewRelic::Control.instance end