lib/new_relic/agent/agent.rb in newrelic_rpm-3.4.2.1 vs lib/new_relic/agent/agent.rb in newrelic_rpm-3.5.0
- old
+ new
@@ -5,29 +5,29 @@
require 'zlib'
require 'stringio'
require 'new_relic/agent/new_relic_service'
require 'new_relic/agent/pipe_service'
require 'new_relic/agent/configuration/manager'
+require 'new_relic/agent/database'
module NewRelic
module Agent
# The Agent is a singleton that is instantiated when the plugin is
# activated. It collects performance data from ruby applications
# in realtime as the application runs, and periodically sends that
# data to the NewRelic server.
class Agent
extend NewRelic::Agent::Configuration::Instance
-
+
def initialize
@launch_time = Time.now
@metric_ids = {}
@stats_engine = NewRelic::Agent::StatsEngine.new
@transaction_sampler = NewRelic::Agent::TransactionSampler.new
@sql_sampler = NewRelic::Agent::SqlSampler.new
- @stats_engine.transaction_sampler = @transaction_sampler
@error_collector = NewRelic::Agent::ErrorCollector.new
@connect_attempts = 0
@last_harvest_time = Time.now
@obfuscator = lambda {|sql| NewRelic::Agent::Database.default_sql_obfuscator(sql) }
@@ -35,25 +35,36 @@
# FIXME: temporary work around for RUBY-839
if Agent.config[:monitor_mode]
@service = NewRelic::Agent::NewRelicService.new
end
+
+ txn_tracer_enabler = Proc.new do
+ if NewRelic::Agent.config[:'transaction_tracer.enabled'] ||
+ NewRelic::Agent.config[:developer_mode]
+ @stats_engine.transaction_sampler = @transaction_sampler
+ else
+ @stats_engine.transaction_sampler = nil
+ end
+ end
+ Agent.config.register_callback(:'transaction_tracer.enabled', &txn_tracer_enabler)
+ Agent.config.register_callback(:developer_mode, &txn_tracer_enabler)
end
# contains all the class-level methods for NewRelic::Agent::Agent
module ClassMethods
# Should only be called by NewRelic::Control - returns a
# memoized singleton instance of the agent, creating one if needed
def instance
@instance ||= self.new
end
end
-
+
# Holds all the methods defined on NewRelic::Agent::Agent
# instances
module InstanceMethods
-
+
# holds a proc that is used to obfuscate sql statements
attr_reader :obfuscator
# the statistics engine that holds all the timeslice data
attr_reader :stats_engine
# the transaction sampler that handles recording transactions
@@ -73,34 +84,34 @@
# a configuration for the Real User Monitoring system -
# handles things like static setup of the header for inclusion
# into pages
attr_reader :beacon_configuration
attr_accessor :service
-
+
# Returns the length of the unsent errors array, if it exists,
# otherwise nil
def unsent_errors_size
@unsent_errors.length if @unsent_errors
end
-
+
# Returns the length of the traces array, if it exists,
# otherwise nil
def unsent_traces_size
@traces.length if @traces
end
-
+
# Initializes the unsent timeslice data hash, if needed, and
# returns the number of keys it contains
def unsent_timeslice_data
@unsent_timeslice_data ||= {}
@unsent_timeslice_data.keys.length
end
# fakes out a transaction that did not happen in this process
# by creating apdex, summary metrics, and recording statistics
# for the transaction
- #
+ #
# This method is *deprecated* - it may be removed in future
# versions of the agent
def record_transaction(duration_seconds, options={})
is_error = options['is_error'] || options['error_message'] || options['exception']
metric = options['metric']
@@ -151,41 +162,43 @@
# * <tt>:keep_retrying => false</tt> if we try to initiate a new
# connection, this tells me to only try it once so this method returns
# quickly if there is some kind of latency with the server.
def after_fork(options={})
@forked = true
+ Agent.config.apply_config(NewRelic::Agent::Configuration::ManualSource.new(options), 1)
+
# @connected gets false after we fail to connect or have an error
# connecting. @connected has nil if we haven't finished trying to connect.
# or we didn't attempt a connection because this is the master process
-
+
if channel_id = options[:report_to_channel]
@service = NewRelic::Agent::PipeService.new(channel_id)
@connected_pid = $$
@metric_ids = {}
end
-
+
# log.debug "Agent received after_fork notice in #$$: [#{control.agent_enabled?}; monitor=#{control.monitor_mode?}; connected: #{@connected.inspect}; thread=#{@worker_thread.inspect}]"
return if !Agent.config[:agent_enabled] ||
!Agent.config[:monitor_mode] ||
@connected == false ||
@worker_thread && @worker_thread.alive?
- log.info "Starting the worker thread in #$$ after forking."
+ log.info "Starting the worker thread in #{$$} after forking."
# Clear out stats that are left over from parent process
reset_stats
# Don't ever check to see if this is a spawner. If we're in a forked process
# I'm pretty sure we're not also forking new instances.
start_worker_thread(options)
@stats_engine.start_sampler_thread
end
-
+
def forked?
@forked
end
-
+
# True if we have initialized and completed 'start'
def started?
@started
end
@@ -320,11 +333,11 @@
log.info "Application: #{names.join(", ")}"
else
log.error 'Unable to determine application name. Please set the application name in your newrelic.yml or in a NEW_RELIC_APP_NAME environment variable.'
end
end
-
+
# Connecting in the foreground blocks further startup of the
# agent until we have a connection - useful in cases where
# you're trying to log a very-short-running process and want
# to get statistics from before a server connection
# (typically 20 seconds) exists
@@ -390,17 +403,19 @@
end
# Warn the user if they have configured their agent not to
# send data, that way we can see this clearly in the log file
def monitoring?
- log_unless(Agent.config[:monitor_mode], :warn, "Agent configured not to send data in this environment - edit newrelic.yml to change this")
+ log_unless(Agent.config[:monitor_mode], :warn,
+ "Agent configured not to send data in this environment.")
end
# Tell the user when the license key is missing so they can
# fix it by adding it to the file
def has_license_key?
- log_unless(Agent.config[:license_key], :error, "No license key found. Please edit your newrelic.yml file and insert your license key.")
+ log_unless(Agent.config[:license_key], :warn,
+ "No license key found in newrelic.yml config.")
end
# A correct license key exists and is of the proper length
def has_correct_license_key?
has_license_key? && correct_license_length
@@ -440,11 +455,10 @@
return if already_started? || disabled?
@started = true
@local_host = determine_host
log_dispatcher
log_app_names
- config_transaction_tracer
check_config_and_start_agent
log_version_and_pid
notify_log_file_location
end
@@ -458,48 +472,27 @@
@last_harvest_time = Time.now
@launch_time = Time.now
end
private
-
+
# All of this module used to be contained in the
# start_worker_thread method - this is an artifact of
# refactoring and can be moved, renamed, etc at will
module StartWorkerThread
-
- # disable transaction sampling if disabled by the server
- # and we're not in dev mode
- def check_transaction_sampler_status
- if Agent.config[:developer_mode] || @should_send_samples
- @transaction_sampler.enable
- else
- @transaction_sampler.disable
- end
- end
-
- def check_sql_sampler_status
- # disable sql sampling if disabled by the server
- # and we're not in dev mode
- if Agent.config[:'slow_sql.enabled'] && ['raw', 'obfuscated'].include?(Agent.config[:'slow_sql.record_sql']) && Agent.config[:'transaction_tracer.enabled']
- @sql_sampler.enable
- else
- @sql_sampler.disable
- end
- end
-
# logs info about the worker loop so users can see when the
# agent actually begins running in the background
def log_worker_loop_start
- log.info "Reporting performance data every #{@report_period} seconds."
+ log.info "Reporting performance data every #{Agent.config[:data_report_period]} seconds."
log.debug "Running worker loop"
end
# Creates the worker loop and loads it with the instructions
# it should run every @report_period seconds
def create_and_run_worker_loop
@worker_loop = WorkerLoop.new
- @worker_loop.run(@report_period) do
+ @worker_loop.run(Agent.config[:data_report_period]) do
transmit_data
end
end
# Handles the case where the server tells us to restart -
@@ -568,12 +561,10 @@
# the server rejected us for a licensing reason and we should
# just exit the thread. If it returns nil
# that means it didn't try to connect because we're in the master.
connect(connection_options)
if @connected
- check_transaction_sampler_status
- check_sql_sampler_status
log_worker_loop_start
create_and_run_worker_loop
# never reaches here unless there is a problem or
# the agent is exiting
else
@@ -598,11 +589,11 @@
# A shorthand for NewRelic::Control.instance
def control
NewRelic::Control.instance
end
-
+
# This module is an artifact of a refactoring of the connect
# method - all of its methods are used in that context, so it
# can be refactored at will. It should be fully tested
module Connect
# the frequency with which we should try to connect to the
@@ -729,110 +720,15 @@
def connect_to_server
log_seed_token
@service.connect(connect_settings)
end
- # Configures the error collector if the server says that we
- # are allowed to send errors. Pretty simple, and logs at
- # debug whether errors will or will not be sent.
- def configure_error_collector!(server_enabled)
- # Reinitialize the error collector
- @error_collector = NewRelic::Agent::ErrorCollector.new
- # Ask for permission to collect error data
- enabled = if error_collector.config_enabled && server_enabled
- error_collector.enabled = true
- else
- error_collector.enabled = false
- end
- log.debug "Errors will #{enabled ? '' : 'not '}be sent to the New Relic service."
- end
-
- # Random sampling is enabled based on a sample rate, which
- # is the n in "every 1/n transactions is added regardless of
- # its length".
- #
- # uses a sane default for sampling rate if the sampling rate
- # is zero, since the collector currently sends '0' as a
- # sampling rate for all accounts, which is probably for
- # legacy reasons
- def enable_random_samples!(sample_rate)
- sample_rate = 10 unless sample_rate.to_i > 0
- @transaction_sampler.random_sampling = true
- @transaction_sampler.sampling_rate = sample_rate
- log.info "Transaction sampling enabled, rate = #{@transaction_sampler.sampling_rate}"
- end
-
- # this entire method should be done on the transaction
- # sampler object, rather than here. We should pass in the
- # sampler config.
- def config_transaction_tracer
- # Reconfigure the transaction tracer
- @transaction_sampler.configure!
- @sql_sampler.configure!
- @should_send_samples = @config_should_send_samples = Agent.config[:'transaction_tracer.enabled']
- @should_send_random_samples = Agent.config[:'transaction_tracer.random_sample']
- set_sql_recording!
-
- # default to 2.0, string 'apdex_f' will turn into your
- # apdex * 4
- @slowest_transaction_threshold = Agent.config[:'transaction_tracer.transaction_threshold']
- end
-
- # Enables or disables the transaction tracer and sets its
- # options based on the options provided to the
- # method.
- def configure_transaction_tracer!(server_enabled, sample_rate)
- # Ask the server for permission to send transaction samples.
- # determined by subscription license.
- @sql_sampler.configure!
- @should_send_samples = @config_should_send_samples && server_enabled
-
- if @should_send_samples
- # I don't think this is ever true, but...
- enable_random_samples!(sample_rate) if @should_send_random_samples
-
- @transaction_sampler.slow_capture_threshold = @slowest_transaction_threshold
-
- log.debug "Transaction tracing threshold is #{@slowest_transaction_threshold} seconds."
- else
- log.debug "Transaction traces will not be sent to the New Relic service."
- end
- end
-
# apdex_f is always 4 times the apdex_t
def apdex_f
(4 * Agent.config[:apdex_t]).to_f
end
- # Sets the sql recording configuration by trying to detect
- # any attempt to disable the sql collection - 'off',
- # 'false', 'none', and friends. Otherwise, we accept 'raw',
- # and unrecognized values default to 'obfuscated'
- def set_sql_recording!
- record_sql_config = Agent.config[:'transaction_tracer.record_sql']
- case record_sql_config.to_s
- when 'off'
- @record_sql = :off
- when 'none'
- @record_sql = :off
- when 'false'
- @record_sql = :off
- when 'raw'
- @record_sql = :raw
- else
- @record_sql = :obfuscated
- end
-
- log_sql_transmission_warning?
- end
-
- # Warn the user when we are sending raw sql across the wire
- # - they should probably be using ssl when this is true
- def log_sql_transmission_warning?
- log.warn("Agent is configured to send raw SQL to the service") if @record_sql == :raw
- end
-
# Sets the collector host and connects to the server, then
# invokes the final configuration with the returned data
def query_server_for_configuration
finish_setup(connect_to_server)
end
@@ -844,36 +740,41 @@
#
# Can accommodate most arbitrary data - anything extra is
# ignored unless we say to do something with it here.
def finish_setup(config_data)
return if config_data == nil
-
+
@service.agent_id = config_data['agent_run_id'] if @service
- @report_period = config_data['data_report_period']
- @url_rules = config_data['url_rules']
- @beacon_configuration = BeaconConfiguration.new(config_data)
- if config_data['listen_to_server_config']
+ if config_data['agent_config']
log.info "Using config from server"
- log.debug "Server provided config: #{config_data.inspect}"
- server_config = NewRelic::Agent::Configuration::ServerSource.new(config_data)
- Agent.config.apply_config(server_config, 1)
end
- config_transaction_tracer
+ log.debug "Server provided config: #{config_data.inspect}"
+ server_config = NewRelic::Agent::Configuration::ServerSource.new(config_data)
+ Agent.config.apply_config(server_config, 1)
log_connection!(config_data) if @service
- configure_transaction_tracer!(config_data['collect_traces'], config_data['sample_rate'])
- configure_error_collector!(config_data['collect_errors'])
+
+ @beacon_configuration = BeaconConfiguration.new
end
-
+
# Logs when we connect to the server, for debugging purposes
# - makes sure we know if an agent has not connected
def log_connection!(config_data)
- control.log! "Connected to NewRelic Service at #{@service.collector.name}"
+ log.info "Connected to NewRelic Service at #{@service.collector.name}"
log.debug "Agent Run = #{@service.agent_id}."
log.debug "Connection data = #{config_data.inspect}"
+ if config_data['messages'] && config_data['messages'].any?
+ log_collector_messages(config_data['messages'])
+ end
end
+
+ def log_collector_messages(messages)
+ messages.each do |message|
+ log.send(message['level'].downcase.to_sym, message['message'])
+ end
+ end
end
include Connect
# Serialize all the important data that the agent might want
@@ -1003,15 +904,20 @@
def harvest_and_send_timeslice_data
now = Time.now
NewRelic::Agent.instance.stats_engine.get_stats_no_scope('Supportability/invoke_remote').record_data_point(0.0)
NewRelic::Agent.instance.stats_engine.get_stats_no_scope('Supportability/invoke_remote/metric_data').record_data_point(0.0)
harvest_timeslice_data(now)
- # In this version of the protocol, we get back an assoc array of spec to id.
- metric_specs_and_ids = @service.metric_data(@last_harvest_time.to_f,
- now.to_f,
- @unsent_timeslice_data.values)
- metric_specs_and_ids ||= []
+ # In this version of the protocol
+ # we get back an assoc array of spec to id.
+ metric_specs_and_ids = []
+ begin
+ metric_specs_and_ids = @service.metric_data(@last_harvest_time.to_f,
+ now.to_f,
+ @unsent_timeslice_data.values)
+ rescue UnrecoverableServerException => e
+ log.debug e.message
+ end
fill_metric_id_cache(metric_specs_and_ids)
log.debug "#{now}: sent #{@unsent_timeslice_data.length} timeslices (#{@service.agent_id}) in #{Time.now - now} seconds"
# if we successfully invoked this web service, then clear the unsent message cache.
@@ -1021,23 +927,26 @@
# Fills the traces array with the harvested transactions from
# the transaction sampler, subject to the setting for slowest
# transaction threshold
def harvest_transaction_traces
- @traces = @transaction_sampler.harvest(@traces, @slowest_transaction_threshold)
+ @traces = @transaction_sampler.harvest(@traces)
@traces
end
def harvest_and_send_slowest_sql
# FIXME add the code to try to resend if our connection is down
sql_traces = @sql_sampler.harvest
unless sql_traces.empty?
log.debug "Sending (#{sql_traces.size}) sql traces"
begin
@service.sql_trace_data(sql_traces)
- rescue
- @sql_sampler.merge sql_traces
+ rescue UnrecoverableServerException => e
+ log.debug e.message
+ rescue => e
+ log.debug "Remerging SQL traces after #{e.class.name}: #{e.message}"
+ @sql_sampler.merge sql_traces
end
end
end
# This handles getting the transaction traces and then sending
@@ -1049,26 +958,25 @@
def harvest_and_send_slowest_sample
harvest_transaction_traces
unless @traces.empty?
now = Time.now
log.debug "Sending (#{@traces.length}) transaction traces"
-
+
begin
options = { :keep_backtraces => true }
- options[:record_sql] = @record_sql unless @record_sql == :off
- if @transaction_sampler.explain_enabled
- options[:explain_sql] = @transaction_sampler.explain_threshold
+ if !(NewRelic::Agent::Database.record_sql_method == :off)
+ options[:record_sql] = NewRelic::Agent::Database.record_sql_method
end
+ if Agent.config[:'transaction_tracer.explain_enabled']
+ options[:explain_sql] = Agent.config[:'transaction_tracer.explain_threshold']
+ end
traces = @traces.collect {|trace| trace.prepare_to_send(options)}
@service.transaction_sample_data(traces)
- rescue PostTooBigException
- # we tried to send too much data, drop the first trace and
- # try again
- retry if @traces.shift
+ log.debug "Sent slowest sample (#{@service.agent_id}) in #{Time.now - now} seconds"
+ rescue UnrecoverableServerException => e
+ log.debug e.message
end
-
- log.debug "Sent slowest sample (#{@service.agent_id}) in #{Time.now - now} seconds"
end
# if we succeed sending this sample, then we don't need to keep
# the slowest sample around - it has been sent already and we
# can clear the collection and move on
@@ -1091,32 +999,34 @@
harvest_errors
if @unsent_errors && @unsent_errors.length > 0
log.debug "Sending #{@unsent_errors.length} errors"
begin
@service.error_data(@unsent_errors)
- rescue PostTooBigException
- @unsent_errors.shift
- retry
+ rescue UnrecoverableServerException => e
+ log.debug e.message
end
# if the remote invocation fails, then we never clear
# @unsent_errors, and therefore we can re-attempt to send on
# the next heartbeat. Note the error collector maxes out at
# 20 instances to prevent leakage
@unsent_errors = []
end
end
-
+
def transmit_data
log.debug "Sending data to New Relic Service"
harvest_and_send_errors
harvest_and_send_slowest_sample
harvest_and_send_slowest_sql
harvest_and_send_timeslice_data
rescue => e
retry_count ||= 0
retry_count += 1
- retry unless retry_count > 1
+ if retry_count <= 1
+ log.debug "retrying transmit_data after #{e}"
+ retry
+ end
raise e
ensure
NewRelic::Agent::Database.close_connections unless forked?
end
@@ -1137,17 +1047,18 @@
@service.shutdown(Time.now.to_f)
else
log.debug "This agent connected from parent process #{@connected_pid}--not sending shutdown"
end
log.debug "Graceful disconnect complete"
- rescue Timeout::Error, StandardError
+ rescue Timeout::Error, StandardError => e
+ log.debug "Error when disconnecting #{e.class.name}: #{e.message}"
end
else
log.debug "Bypassing graceful disconnect - agent not connected"
end
end
end
-
+
extend ClassMethods
include InstanceMethods
include BrowserMonitoring
end
end