lib/new_relic/agent/agent.rb in newrelic_rpm-8.8.0 vs lib/new_relic/agent/agent.rb in newrelic_rpm-8.9.0
- old
+ new
@@ -35,10 +35,15 @@
require 'new_relic/agent/attribute_filter'
require 'new_relic/agent/adaptive_sampler'
require 'new_relic/agent/connect/request_builder'
require 'new_relic/agent/connect/response_handler'
+require 'new_relic/agent/agent/start_worker_thread'
+require 'new_relic/agent/agent/startup'
+require 'new_relic/agent/agent/special_startup'
+require 'new_relic/agent/agent/shutdown'
+
module NewRelic
module Agent
# The Agent is a singleton that is instantiated when the plugin is
# activated. It collects performance data from ruby applications
# in realtime as the application runs, and periodically sends that
@@ -46,10 +51,15 @@
class Agent
def self.config
::NewRelic::Agent.config
end
+ include NewRelic::Agent::StartWorkerThread
+ include NewRelic::Agent::SpecialStartup
+ include NewRelic::Agent::Startup
+ include NewRelic::Agent::Shutdown
+
def initialize
@started = false
@event_loop = nil
@worker_thread = nil
@@ -220,71 +230,24 @@
@service.shutdown
disconnect
end
end
- # True if we have initialized and completed 'start'
- def started?
- @started
- end
-
- # Attempt a graceful shutdown of the agent, flushing any remaining
- # data.
- def shutdown
- return unless started?
- ::NewRelic::Agent.logger.info "Starting Agent shutdown"
-
- stop_event_loop
- trap_signals_for_litespeed
- untraced_graceful_disconnect
- revert_to_default_configuration
-
- @started = nil
- Control.reset
- end
-
def revert_to_default_configuration
Agent.config.remove_config_type(:manual)
Agent.config.remove_config_type(:server)
end
- # If the @worker_thread encounters an error during the attempt to connect to the collector
- # then the connect attempts enter an exponential backoff retry loop. To avoid potential
- # race conditions with shutting down while also attempting to reconnect, we join the
- # @worker_thread with a timeout threshold. This allows potentially connecting and flushing
- # pending data to the server, but without waiting indefinitely for a reconnect to succeed.
- # The use-case where this typically arises is in cronjob scheduled rake tasks where there's
- # also some network stability/latency issues happening.
- def stop_event_loop
- @event_loop.stop if @event_loop
- # Wait the end of the event loop thread.
- if @worker_thread
- unless @worker_thread.join(3)
- ::NewRelic::Agent.logger.debug "Event loop thread did not stop within 3 seconds"
- end
- end
- end
-
def trap_signals_for_litespeed
# if litespeed, then ignore all future SIGUSR1 - it's
# litespeed trying to shut us down
if Agent.config[:dispatcher] == :litespeed
Signal.trap("SIGUSR1", "IGNORE")
Signal.trap("SIGTERM", "IGNORE")
end
end
- def untraced_graceful_disconnect
- begin
- NewRelic::Agent.disable_all_tracing do
- graceful_disconnect
- end
- rescue => e
- ::NewRelic::Agent.logger.error e
- end
- end
-
# Sets a thread local variable as to whether we should or
# should not record sql in the current thread. Returns the
# previous value, if there is one
def set_record_sql(should_record) # THREAD_LOCAL_ACCESS
state = Tracer.state
@@ -305,259 +268,10 @@
# to what it was before we pushed the current flag.
def pop_trace_execution_flag # THREAD_LOCAL_ACCESS
Tracer.state.pop_traced
end
- # Herein lies the corpse of the former 'start' method. May
- # its unmatched flog score rest in pieces.
- module Start
- # Check whether we have already started, which is an error condition
- def already_started?
- if started?
- ::NewRelic::Agent.logger.error("Agent Started Already!")
- true
- end
- end
-
- # The agent is disabled when it is not force enabled by the
- # 'agent_enabled' option (e.g. in a manual start), or
- # enabled normally through the configuration file
- def disabled?
- !Agent.config[:agent_enabled]
- end
-
- # Log startup information that we almost always want to know
- def log_startup
- log_environment
- log_dispatcher
- log_app_name
- end
-
- # Log the environment the app thinks it's running in.
- # Useful in debugging, as this is the key for config YAML lookups.
- def log_environment
- ::NewRelic::Agent.logger.info "Environment: #{NewRelic::Control.instance.env}"
- end
-
- # Logs the dispatcher to the log file to assist with
- # debugging. When no debugger is present, logs this fact to
- # assist with proper dispatcher detection
- def log_dispatcher
- dispatcher_name = Agent.config[:dispatcher].to_s
-
- if dispatcher_name.empty?
- ::NewRelic::Agent.logger.info 'No known dispatcher detected.'
- else
- ::NewRelic::Agent.logger.info "Dispatcher: #{dispatcher_name}"
- end
- end
-
- def log_app_name
- ::NewRelic::Agent.logger.info "Application: #{Agent.config[:app_name].join(", ")}"
- end
-
- def log_ignore_url_regexes
- regexes = NewRelic::Agent.config[:'rules.ignore_url_regexes']
-
- unless regexes.empty?
- ::NewRelic::Agent.logger.info "Ignoring URLs that match the following regexes: #{regexes.map(&:inspect).join(", ")}."
- end
- end
-
- # Logs the configured application names
- def app_name_configured?
- names = Agent.config[:app_name]
- return names.respond_to?(:any?) && names.any?
- end
-
- # Connecting in the foreground blocks further startup of the
- # agent until we have a connection - useful in cases where
- # you're trying to log a very-short-running process and want
- # to get statistics from before a server connection
- # (typically 20 seconds) exists
- def connect_in_foreground
- NewRelic::Agent.disable_all_tracing { connect(:keep_retrying => false) }
- end
-
- # This matters when the following three criteria are met:
- #
- # 1. A Sinatra 'classic' application is being run
- # 2. The app is being run by executing the main file directly, rather
- # than via a config.ru file.
- # 3. newrelic_rpm is required *after* sinatra
- #
- # In this case, the entire application runs from an at_exit handler in
- # Sinatra, and if we were to install ours, it would be executed before
- # the one in Sinatra, meaning that we'd shutdown the agent too early
- # and never collect any data.
- def sinatra_classic_app?
- (
- defined?(Sinatra::Application) &&
- Sinatra::Application.respond_to?(:run) &&
- Sinatra::Application.run?
- )
- end
-
- def should_install_exit_handler?
- return false unless Agent.config[:send_data_on_exit]
- !sinatra_classic_app? || Agent.config[:force_install_exit_handler]
- end
-
- def install_exit_handler
- return unless should_install_exit_handler?
- NewRelic::Agent.logger.debug("Installing at_exit handler")
- at_exit { shutdown }
- end
-
- # Classy logging of the agent version and the current pid,
- # so we can disambiguate processes in the log file and make
- # sure they're running a reasonable version
- def log_version_and_pid
- ::NewRelic::Agent.logger.debug "New Relic Ruby Agent #{NewRelic::VERSION::STRING} Initialized: pid = #{$$}"
- end
-
- # Warn the user if they have configured their agent not to
- # send data, that way we can see this clearly in the log file
- def monitoring?
- if Agent.config[:monitor_mode]
- true
- else
- ::NewRelic::Agent.logger.warn('Agent configured not to send data in this environment.')
- false
- end
- end
-
- # Tell the user when the license key is missing so they can
- # fix it by adding it to the file
- def has_license_key?
- if Agent.config[:license_key] && Agent.config[:license_key].length > 0
- true
- else
- ::NewRelic::Agent.logger.warn("No license key found. " +
- "This often means your newrelic.yml file was not found, or it lacks a section for the running environment, '#{NewRelic::Control.instance.env}'. You may also want to try linting your newrelic.yml to ensure it is valid YML.")
- false
- end
- end
-
- # A correct license key exists and is of the proper length
- def has_correct_license_key?
- has_license_key? && correct_license_length
- end
-
- # A license key is an arbitrary 40 character string,
- # usually looks something like a SHA1 hash
- def correct_license_length
- key = Agent.config[:license_key]
-
- if key.length == 40
- true
- else
- ::NewRelic::Agent.logger.error("Invalid license key: #{key}")
- false
- end
- end
-
- # If we're using a dispatcher that forks before serving
- # requests, we need to wait until the children are forked
- # before connecting, otherwise the parent process sends useless data
- def using_forking_dispatcher?
- # TODO: MAJOR VERSION - remove :rainbows
- if [:puma, :passenger, :rainbows, :unicorn].include? Agent.config[:dispatcher]
- ::NewRelic::Agent.logger.info "Deferring startup of agent reporting thread because #{Agent.config[:dispatcher]} may fork."
- true
- else
- false
- end
- end
-
- # Return true if we're using resque and it hasn't had a chance to (potentially)
- # daemonize itself. This avoids hanging when there's a Thread started
- # before Resque calls Process.daemon (Jira RUBY-857)
- def defer_for_resque?
- NewRelic::Agent.config[:dispatcher] == :resque &&
- NewRelic::Agent::Instrumentation::Resque::Helper.resque_fork_per_job? &&
- !PipeChannelManager.listener.started?
- end
-
- def in_resque_child_process?
- defined?(@service) && @service.is_a?(PipeService)
- end
-
- # Sanity-check the agent configuration and start the agent,
- # setting up the worker thread and the exit handler to shut
- # down the agent
- def check_config_and_start_agent
- return unless monitoring? && has_correct_license_key?
- return if using_forking_dispatcher?
- setup_and_start_agent
- end
-
- # This is the shared method between the main agent startup and the
- # after_fork call restarting the thread in deferred dispatchers.
- #
- # Treatment of @started and env report is important to get right.
- def setup_and_start_agent(options = {})
- @started = true
- @harvester.mark_started
-
- unless in_resque_child_process?
- install_exit_handler
- environment_for_connect
- @harvest_samplers.load_samplers unless Agent.config[:disable_samplers]
- end
-
- connect_in_foreground if Agent.config[:sync_startup]
- start_worker_thread(options)
- end
- end
-
- include Start
-
- def defer_for_delayed_job?
- NewRelic::Agent.config[:dispatcher] == :delayed_job &&
- !NewRelic::DelayedJobInjection.worker_name
- end
-
- # Check to see if the agent should start, returning +true+ if it should.
- def agent_should_start?
- return false if already_started? || disabled?
-
- if defer_for_delayed_job?
- ::NewRelic::Agent.logger.debug "Deferring startup for DelayedJob"
- return false
- end
-
- if defer_for_resque?
- ::NewRelic::Agent.logger.debug "Deferring startup for Resque in case it daemonizes"
- return false
- end
-
- unless app_name_configured?
- NewRelic::Agent.logger.error "No application name configured.",
- "The Agent cannot start without at least one. Please check your ",
- "newrelic.yml and ensure that it is valid and has at least one ",
- "value set for app_name in the #{NewRelic::Control.instance.env} ",
- "environment."
- return false
- end
-
- return true
- end
-
- # Logs a bunch of data and starts the agent, if needed
- def start
- return unless agent_should_start?
-
- log_startup
- check_config_and_start_agent
- log_version_and_pid
-
- events.subscribe(:initial_configuration_complete) do
- log_ignore_url_regexes
- end
- end
-
# Clear out the metric data, errors, and transaction traces, etc.
def drop_buffered_data
@stats_engine.reset!
@error_collector.drop_buffered_data
@transaction_sampler.reset!
@@ -589,152 +303,9 @@
transmit_log_event_data
end
end
private
-
- # All of this module used to be contained in the
- # start_worker_thread method - this is an artifact of
- # refactoring and can be moved, renamed, etc at will
- module StartWorkerThread
- def create_event_loop
- EventLoop.new
- end
-
- LOG_ONCE_KEYS_RESET_PERIOD = 60.0
-
- # Certain event types may sometimes need to be on the same interval as metrics,
- # so we will check config assigned in EventHarvestConfig to determine the interval
- # on which to report them
- def interval_for event_type
- interval = Agent.config[:"event_report_period.#{event_type}"]
- :"#{interval}_second_harvest"
- end
-
- TRANSACTION_EVENT_DATA = "transaction_event_data".freeze
- CUSTOM_EVENT_DATA = "custom_event_data".freeze
- ERROR_EVENT_DATA = "error_event_data".freeze
- SPAN_EVENT_DATA = "span_event_data".freeze
- LOG_EVENT_DATA = "log_event_data".freeze
-
- def create_and_run_event_loop
- data_harvest = :"#{Agent.config[:data_report_period]}_second_harvest"
- event_harvest = :"#{Agent.config[:event_report_period]}_second_harvest"
-
- @event_loop = create_event_loop
- @event_loop.on(data_harvest) do
- transmit_data
- end
-
- @event_loop.on(interval_for TRANSACTION_EVENT_DATA) do
- transmit_analytic_event_data
- end
- @event_loop.on(interval_for CUSTOM_EVENT_DATA) do
- transmit_custom_event_data
- end
- @event_loop.on(interval_for ERROR_EVENT_DATA) do
- transmit_error_event_data
- end
- @event_loop.on(interval_for SPAN_EVENT_DATA) do
- transmit_span_event_data
- end
- @event_loop.on(interval_for LOG_EVENT_DATA) do
- transmit_log_event_data
- end
-
- @event_loop.on(:reset_log_once_keys) do
- ::NewRelic::Agent.logger.clear_already_logged
- end
- @event_loop.fire_every(Agent.config[:data_report_period], data_harvest)
- @event_loop.fire_every(Agent.config[:event_report_period], event_harvest)
- @event_loop.fire_every(LOG_ONCE_KEYS_RESET_PERIOD, :reset_log_once_keys)
-
- @event_loop.run
- end
-
- # Handles the case where the server tells us to restart -
- # this clears the data, clears connection attempts, and
- # waits a while to reconnect.
- def handle_force_restart(error)
- ::NewRelic::Agent.logger.debug error.message
- drop_buffered_data
- @service.force_restart if @service
- @connect_state = :pending
- sleep 30
- end
-
- # when a disconnect is requested, stop the current thread, which
- # is the worker thread that gathers data and talks to the
- # server.
- def handle_force_disconnect(error)
- ::NewRelic::Agent.logger.warn "Agent received a ForceDisconnectException from the server, disconnecting. (#{error.message})"
- disconnect
- end
-
- # Handles an unknown error in the worker thread by logging
- # it and disconnecting the agent, since we are now in an
- # unknown state.
- def handle_other_error(error)
- ::NewRelic::Agent.logger.error "Unhandled error in worker thread, disconnecting."
- # These errors are fatal (that is, they will prevent the agent from
- # reporting entirely), so we really want backtraces when they happen
- ::NewRelic::Agent.logger.log_exception(:error, error)
- disconnect
- end
-
- # a wrapper method to handle all the errors that can happen
- # in the connection and worker thread system. This
- # guarantees a no-throw from the background thread.
- def catch_errors
- yield
- rescue NewRelic::Agent::ForceRestartException => e
- handle_force_restart(e)
- retry
- rescue NewRelic::Agent::ForceDisconnectException => e
- handle_force_disconnect(e)
- rescue => e
- handle_other_error(e)
- end
-
- # This is the method that is run in a new thread in order to
- # background the harvesting and sending of data during the
- # normal operation of the agent.
- #
- # Takes connection options that determine how we should
- # connect to the server, and loops endlessly - typically we
- # never return from this method unless we're shutting down
- # the agent
- def deferred_work!(connection_options)
- catch_errors do
- NewRelic::Agent.disable_all_tracing do
- connect(connection_options)
- if connected?
- create_and_run_event_loop
- # never reaches here unless there is a problem or
- # the agent is exiting
- else
- ::NewRelic::Agent.logger.debug "No connection. Worker thread ending."
- end
- end
- end
- end
- end
- include StartWorkerThread
-
- # Try to launch the worker thread and connect to the server.
- #
- # See #connect for a description of connection_options.
- def start_worker_thread(connection_options = {})
- if disable = NewRelic::Agent.config[:disable_harvest_thread]
- NewRelic::Agent.logger.info "Not starting Ruby Agent worker thread because :disable_harvest_thread is #{disable}"
- return
- end
-
- ::NewRelic::Agent.logger.debug "Creating Ruby Agent worker thread."
- @worker_thread = Threading::AgentThread.create('Worker Loop') do
- deferred_work!(connection_options)
- end
- end
# A shorthand for NewRelic::Control.instance
def control
NewRelic::Control.instance
end