agent.rb in newrelic_rpm-8.9.0

- old
+ new

@@ -35,10 +35,15 @@
 require 'new_relic/agent/attribute_filter'
 require 'new_relic/agent/adaptive_sampler'
 require 'new_relic/agent/connect/request_builder'
 require 'new_relic/agent/connect/response_handler'
 
+require 'new_relic/agent/agent/start_worker_thread'
+require 'new_relic/agent/agent/startup'
+require 'new_relic/agent/agent/special_startup'
+require 'new_relic/agent/agent/shutdown'
+
 module NewRelic
   module Agent
     # The Agent is a singleton that is instantiated when the plugin is
     # activated.  It collects performance data from ruby applications
     # in realtime as the application runs, and periodically sends that
@@ -46,10 +51,15 @@
     class Agent
       def self.config
         ::NewRelic::Agent.config
       end
 
+      include NewRelic::Agent::StartWorkerThread
+      include NewRelic::Agent::SpecialStartup
+      include NewRelic::Agent::Startup
+      include NewRelic::Agent::Shutdown
+
       def initialize
         @started = false
         @event_loop = nil
         @worker_thread = nil
 
@@ -220,71 +230,24 @@
             @service.shutdown
             disconnect
           end
         end
 
-        # True if we have initialized and completed 'start'
-        def started?
-          @started
-        end
-
-        # Attempt a graceful shutdown of the agent, flushing any remaining
-        # data.
-        def shutdown
-          return unless started?
-          ::NewRelic::Agent.logger.info "Starting Agent shutdown"
-
-          stop_event_loop
-          trap_signals_for_litespeed
-          untraced_graceful_disconnect
-          revert_to_default_configuration
-
-          @started = nil
-          Control.reset
-        end
-
         def revert_to_default_configuration
           Agent.config.remove_config_type(:manual)
           Agent.config.remove_config_type(:server)
         end
 
-        # If the @worker_thread encounters an error during the attempt to connect to the collector
-        # then the connect attempts enter an exponential backoff retry loop.  To avoid potential
-        # race conditions with shutting down while also attempting to reconnect, we join the
-        # @worker_thread with a timeout threshold.  This allows potentially connecting and flushing
-        # pending data to the server, but without waiting indefinitely for a reconnect to succeed.
-        # The use-case where this typically arises is in cronjob scheduled rake tasks where there's
-        # also some network stability/latency issues happening.
-        def stop_event_loop
-          @event_loop.stop if @event_loop
-          # Wait the end of the event loop thread.
-          if @worker_thread
-            unless @worker_thread.join(3)
-              ::NewRelic::Agent.logger.debug "Event loop thread did not stop within 3 seconds"
-            end
-          end
-        end
-
         def trap_signals_for_litespeed
           # if litespeed, then ignore all future SIGUSR1 - it's
           # litespeed trying to shut us down
           if Agent.config[:dispatcher] == :litespeed
             Signal.trap("SIGUSR1", "IGNORE")
             Signal.trap("SIGTERM", "IGNORE")
           end
         end
 
-        def untraced_graceful_disconnect
-          begin
-            NewRelic::Agent.disable_all_tracing do
-              graceful_disconnect
-            end
-          rescue => e
-            ::NewRelic::Agent.logger.error e
-          end
-        end
-
         # Sets a thread local variable as to whether we should or
         # should not record sql in the current thread. Returns the
         # previous value, if there is one
         def set_record_sql(should_record) # THREAD_LOCAL_ACCESS
           state = Tracer.state
@@ -305,259 +268,10 @@
         # to what it was before we pushed the current flag.
         def pop_trace_execution_flag # THREAD_LOCAL_ACCESS
           Tracer.state.pop_traced
         end
 
-        # Herein lies the corpse of the former 'start' method. May
-        # its unmatched flog score rest in pieces.
-        module Start
-          # Check whether we have already started, which is an error condition
-          def already_started?
-            if started?
-              ::NewRelic::Agent.logger.error("Agent Started Already!")
-              true
-            end
-          end
-
-          # The agent is disabled when it is not force enabled by the
-          # 'agent_enabled' option (e.g. in a manual start), or
-          # enabled normally through the configuration file
-          def disabled?
-            !Agent.config[:agent_enabled]
-          end
-
-          # Log startup information that we almost always want to know
-          def log_startup
-            log_environment
-            log_dispatcher
-            log_app_name
-          end
-
-          # Log the environment the app thinks it's running in.
-          # Useful in debugging, as this is the key for config YAML lookups.
-          def log_environment
-            ::NewRelic::Agent.logger.info "Environment: #{NewRelic::Control.instance.env}"
-          end
-
-          # Logs the dispatcher to the log file to assist with
-          # debugging. When no debugger is present, logs this fact to
-          # assist with proper dispatcher detection
-          def log_dispatcher
-            dispatcher_name = Agent.config[:dispatcher].to_s
-
-            if dispatcher_name.empty?
-              ::NewRelic::Agent.logger.info 'No known dispatcher detected.'
-            else
-              ::NewRelic::Agent.logger.info "Dispatcher: #{dispatcher_name}"
-            end
-          end
-
-          def log_app_name
-            ::NewRelic::Agent.logger.info "Application: #{Agent.config[:app_name].join(", ")}"
-          end
-
-          def log_ignore_url_regexes
-            regexes = NewRelic::Agent.config[:'rules.ignore_url_regexes']
-
-            unless regexes.empty?
-              ::NewRelic::Agent.logger.info "Ignoring URLs that match the following regexes: #{regexes.map(&:inspect).join(", ")}."
-            end
-          end
-
-          # Logs the configured application names
-          def app_name_configured?
-            names = Agent.config[:app_name]
-            return names.respond_to?(:any?) && names.any?
-          end
-
-          # Connecting in the foreground blocks further startup of the
-          # agent until we have a connection - useful in cases where
-          # you're trying to log a very-short-running process and want
-          # to get statistics from before a server connection
-          # (typically 20 seconds) exists
-          def connect_in_foreground
-            NewRelic::Agent.disable_all_tracing { connect(:keep_retrying => false) }
-          end
-
-          # This matters when the following three criteria are met:
-          #
-          # 1. A Sinatra 'classic' application is being run
-          # 2. The app is being run by executing the main file directly, rather
-          #    than via a config.ru file.
-          # 3. newrelic_rpm is required *after* sinatra
-          #
-          # In this case, the entire application runs from an at_exit handler in
-          # Sinatra, and if we were to install ours, it would be executed before
-          # the one in Sinatra, meaning that we'd shutdown the agent too early
-          # and never collect any data.
-          def sinatra_classic_app?
-            (
-              defined?(Sinatra::Application) &&
-              Sinatra::Application.respond_to?(:run) &&
-              Sinatra::Application.run?
-            )
-          end
-
-          def should_install_exit_handler?
-            return false unless Agent.config[:send_data_on_exit]
-            !sinatra_classic_app? || Agent.config[:force_install_exit_handler]
-          end
-
-          def install_exit_handler
-            return unless should_install_exit_handler?
-            NewRelic::Agent.logger.debug("Installing at_exit handler")
-            at_exit { shutdown }
-          end
-
-          # Classy logging of the agent version and the current pid,
-          # so we can disambiguate processes in the log file and make
-          # sure they're running a reasonable version
-          def log_version_and_pid
-            ::NewRelic::Agent.logger.debug "New Relic Ruby Agent #{NewRelic::VERSION::STRING} Initialized: pid = #{$$}"
-          end
-
-          # Warn the user if they have configured their agent not to
-          # send data, that way we can see this clearly in the log file
-          def monitoring?
-            if Agent.config[:monitor_mode]
-              true
-            else
-              ::NewRelic::Agent.logger.warn('Agent configured not to send data in this environment.')
-              false
-            end
-          end
-
-          # Tell the user when the license key is missing so they can
-          # fix it by adding it to the file
-          def has_license_key?
-            if Agent.config[:license_key] && Agent.config[:license_key].length > 0
-              true
-            else
-              ::NewRelic::Agent.logger.warn("No license key found. " +
-                "This often means your newrelic.yml file was not found, or it lacks a section for the running environment, '#{NewRelic::Control.instance.env}'. You may also want to try linting your newrelic.yml to ensure it is valid YML.")
-              false
-            end
-          end
-
-          # A correct license key exists and is of the proper length
-          def has_correct_license_key?
-            has_license_key? && correct_license_length
-          end
-
-          # A license key is an arbitrary 40 character string,
-          # usually looks something like a SHA1 hash
-          def correct_license_length
-            key = Agent.config[:license_key]
-
-            if key.length == 40
-              true
-            else
-              ::NewRelic::Agent.logger.error("Invalid license key: #{key}")
-              false
-            end
-          end
-
-          # If we're using a dispatcher that forks before serving
-          # requests, we need to wait until the children are forked
-          # before connecting, otherwise the parent process sends useless data
-          def using_forking_dispatcher?
-            # TODO: MAJOR VERSION - remove :rainbows
-            if [:puma, :passenger, :rainbows, :unicorn].include? Agent.config[:dispatcher]
-              ::NewRelic::Agent.logger.info "Deferring startup of agent reporting thread because #{Agent.config[:dispatcher]} may fork."
-              true
-            else
-              false
-            end
-          end
-
-          # Return true if we're using resque and it hasn't had a chance to (potentially)
-          # daemonize itself. This avoids hanging when there's a Thread started
-          # before Resque calls Process.daemon (Jira RUBY-857)
-          def defer_for_resque?
-            NewRelic::Agent.config[:dispatcher] == :resque &&
-              NewRelic::Agent::Instrumentation::Resque::Helper.resque_fork_per_job? &&
-              !PipeChannelManager.listener.started?
-          end
-
-          def in_resque_child_process?
-            defined?(@service) && @service.is_a?(PipeService)
-          end
-
-          # Sanity-check the agent configuration and start the agent,
-          # setting up the worker thread and the exit handler to shut
-          # down the agent
-          def check_config_and_start_agent
-            return unless monitoring? && has_correct_license_key?
-            return if using_forking_dispatcher?
-            setup_and_start_agent
-          end
-
-          # This is the shared method between the main agent startup and the
-          # after_fork call restarting the thread in deferred dispatchers.
-          #
-          # Treatment of @started and env report is important to get right.
-          def setup_and_start_agent(options = {})
-            @started = true
-            @harvester.mark_started
-
-            unless in_resque_child_process?
-              install_exit_handler
-              environment_for_connect
-              @harvest_samplers.load_samplers unless Agent.config[:disable_samplers]
-            end
-
-            connect_in_foreground if Agent.config[:sync_startup]
-            start_worker_thread(options)
-          end
-        end
-
-        include Start
-
-        def defer_for_delayed_job?
-          NewRelic::Agent.config[:dispatcher] == :delayed_job &&
-            !NewRelic::DelayedJobInjection.worker_name
-        end
-
-        # Check to see if the agent should start, returning +true+ if it should.
-        def agent_should_start?
-          return false if already_started? || disabled?
-
-          if defer_for_delayed_job?
-            ::NewRelic::Agent.logger.debug "Deferring startup for DelayedJob"
-            return false
-          end
-
-          if defer_for_resque?
-            ::NewRelic::Agent.logger.debug "Deferring startup for Resque in case it daemonizes"
-            return false
-          end
-
-          unless app_name_configured?
-            NewRelic::Agent.logger.error "No application name configured.",
-              "The Agent cannot start without at least one. Please check your ",
-              "newrelic.yml and ensure that it is valid and has at least one ",
-              "value set for app_name in the #{NewRelic::Control.instance.env} ",
-              "environment."
-            return false
-          end
-
-          return true
-        end
-
-        # Logs a bunch of data and starts the agent, if needed
-        def start
-          return unless agent_should_start?
-
-          log_startup
-          check_config_and_start_agent
-          log_version_and_pid
-
-          events.subscribe(:initial_configuration_complete) do
-            log_ignore_url_regexes
-          end
-        end
-
         # Clear out the metric data, errors, and transaction traces, etc.
         def drop_buffered_data
           @stats_engine.reset!
           @error_collector.drop_buffered_data
           @transaction_sampler.reset!
@@ -589,152 +303,9 @@
             transmit_log_event_data
           end
         end
 
         private
-
-        # All of this module used to be contained in the
-        # start_worker_thread method - this is an artifact of
-        # refactoring and can be moved, renamed, etc at will
-        module StartWorkerThread
-          def create_event_loop
-            EventLoop.new
-          end
-
-          LOG_ONCE_KEYS_RESET_PERIOD = 60.0
-
-          # Certain event types may sometimes need to be on the same interval as metrics,
-          # so we will check config assigned in EventHarvestConfig to determine the interval
-          # on which to report them
-          def interval_for event_type
-            interval = Agent.config[:"event_report_period.#{event_type}"]
-            :"#{interval}_second_harvest"
-          end
-
-          TRANSACTION_EVENT_DATA = "transaction_event_data".freeze
-          CUSTOM_EVENT_DATA = "custom_event_data".freeze
-          ERROR_EVENT_DATA = "error_event_data".freeze
-          SPAN_EVENT_DATA = "span_event_data".freeze
-          LOG_EVENT_DATA = "log_event_data".freeze
-
-          def create_and_run_event_loop
-            data_harvest = :"#{Agent.config[:data_report_period]}_second_harvest"
-            event_harvest = :"#{Agent.config[:event_report_period]}_second_harvest"
-
-            @event_loop = create_event_loop
-            @event_loop.on(data_harvest) do
-              transmit_data
-            end
-
-            @event_loop.on(interval_for TRANSACTION_EVENT_DATA) do
-              transmit_analytic_event_data
-            end
-            @event_loop.on(interval_for CUSTOM_EVENT_DATA) do
-              transmit_custom_event_data
-            end
-            @event_loop.on(interval_for ERROR_EVENT_DATA) do
-              transmit_error_event_data
-            end
-            @event_loop.on(interval_for SPAN_EVENT_DATA) do
-              transmit_span_event_data
-            end
-            @event_loop.on(interval_for LOG_EVENT_DATA) do
-              transmit_log_event_data
-            end
-
-            @event_loop.on(:reset_log_once_keys) do
-              ::NewRelic::Agent.logger.clear_already_logged
-            end
-            @event_loop.fire_every(Agent.config[:data_report_period], data_harvest)
-            @event_loop.fire_every(Agent.config[:event_report_period], event_harvest)
-            @event_loop.fire_every(LOG_ONCE_KEYS_RESET_PERIOD, :reset_log_once_keys)
-
-            @event_loop.run
-          end
-
-          # Handles the case where the server tells us to restart -
-          # this clears the data, clears connection attempts, and
-          # waits a while to reconnect.
-          def handle_force_restart(error)
-            ::NewRelic::Agent.logger.debug error.message
-            drop_buffered_data
-            @service.force_restart if @service
-            @connect_state = :pending
-            sleep 30
-          end
-
-          # when a disconnect is requested, stop the current thread, which
-          # is the worker thread that gathers data and talks to the
-          # server.
-          def handle_force_disconnect(error)
-            ::NewRelic::Agent.logger.warn "Agent received a ForceDisconnectException from the server, disconnecting. (#{error.message})"
-            disconnect
-          end
-
-          # Handles an unknown error in the worker thread by logging
-          # it and disconnecting the agent, since we are now in an
-          # unknown state.
-          def handle_other_error(error)
-            ::NewRelic::Agent.logger.error "Unhandled error in worker thread, disconnecting."
-            # These errors are fatal (that is, they will prevent the agent from
-            # reporting entirely), so we really want backtraces when they happen
-            ::NewRelic::Agent.logger.log_exception(:error, error)
-            disconnect
-          end
-
-          # a wrapper method to handle all the errors that can happen
-          # in the connection and worker thread system. This
-          # guarantees a no-throw from the background thread.
-          def catch_errors
-            yield
-          rescue NewRelic::Agent::ForceRestartException => e
-            handle_force_restart(e)
-            retry
-          rescue NewRelic::Agent::ForceDisconnectException => e
-            handle_force_disconnect(e)
-          rescue => e
-            handle_other_error(e)
-          end
-
-          # This is the method that is run in a new thread in order to
-          # background the harvesting and sending of data during the
-          # normal operation of the agent.
-          #
-          # Takes connection options that determine how we should
-          # connect to the server, and loops endlessly - typically we
-          # never return from this method unless we're shutting down
-          # the agent
-          def deferred_work!(connection_options)
-            catch_errors do
-              NewRelic::Agent.disable_all_tracing do
-                connect(connection_options)
-                if connected?
-                  create_and_run_event_loop
-                  # never reaches here unless there is a problem or
-                  # the agent is exiting
-                else
-                  ::NewRelic::Agent.logger.debug "No connection.  Worker thread ending."
-                end
-              end
-            end
-          end
-        end
-        include StartWorkerThread
-
-        # Try to launch the worker thread and connect to the server.
-        #
-        # See #connect for a description of connection_options.
-        def start_worker_thread(connection_options = {})
-          if disable = NewRelic::Agent.config[:disable_harvest_thread]
-            NewRelic::Agent.logger.info "Not starting Ruby Agent worker thread because :disable_harvest_thread is #{disable}"
-            return
-          end
-
-          ::NewRelic::Agent.logger.debug "Creating Ruby Agent worker thread."
-          @worker_thread = Threading::AgentThread.create('Worker Loop') do
-            deferred_work!(connection_options)
-          end
-        end
 
         # A shorthand for NewRelic::Control.instance
         def control
           NewRelic::Control.instance
         end