lib/new_relic/agent/agent.rb in newrelic_rpm-3.6.7.159 vs lib/new_relic/agent/agent.rb in newrelic_rpm-3.6.8.164
- old
+ new
@@ -13,11 +13,10 @@
require 'new_relic/agent/new_relic_service'
require 'new_relic/agent/pipe_service'
require 'new_relic/agent/configuration/manager'
require 'new_relic/agent/database'
require 'new_relic/agent/commands/agent_command_router'
-require 'new_relic/agent/commands/thread_profiler'
require 'new_relic/agent/event_listener'
require 'new_relic/agent/cross_app_monitor'
require 'new_relic/agent/request_sampler'
require 'new_relic/agent/sampler_collection'
require 'new_relic/environment_report'
@@ -43,12 +42,11 @@
@events = NewRelic::Agent::EventListener.new
@stats_engine = NewRelic::Agent::StatsEngine.new
@transaction_sampler = NewRelic::Agent::TransactionSampler.new
@sql_sampler = NewRelic::Agent::SqlSampler.new
- @thread_profiler = NewRelic::Agent::Commands::ThreadProfiler.new
- @agent_command_router = NewRelic::Agent::Commands::AgentCommandRouter.new(@thread_profiler)
+ @agent_command_router = NewRelic::Agent::Commands::AgentCommandRouter.new(@events)
@cross_app_monitor = NewRelic::Agent::CrossAppMonitor.new(@events)
@error_collector = NewRelic::Agent::ErrorCollector.new
@transaction_rules = NewRelic::Agent::RulesEngine.new
@metric_rules = NewRelic::Agent::RulesEngine.new
@request_sampler = NewRelic::Agent::RequestSampler.new(@events)
@@ -57,10 +55,11 @@
@connect_state = :pending
@connect_attempts = 0
@environment_report = nil
@last_harvest_time = Time.now
+ @harvest_lock = Mutex.new
@obfuscator = lambda {|sql| NewRelic::Agent::Database.default_sql_obfuscator(sql) }
end
# contains all the class-level methods for NewRelic::Agent::Agent
module ClassMethods
@@ -80,12 +79,11 @@
# the statistics engine that holds all the timeslice data
attr_reader :stats_engine
# the transaction sampler that handles recording transactions
attr_reader :transaction_sampler
attr_reader :sql_sampler
- # begins a thread profile session when instructed by agent commands
- attr_reader :thread_profiler
+ # manages agent commands we receive from the collector, and the handlers
attr_reader :agent_command_router
# error collector is a simple collection of recorded errors
attr_reader :error_collector
attr_reader :harvest_samplers
# whether we should record raw, obfuscated, or no sql
@@ -107,10 +105,11 @@
# Transaction and metric renaming rules as provided by the
# collector on connect. The former are applied during txns,
# the latter during harvest.
attr_reader :transaction_rules
attr_reader :metric_rules
+ attr_reader :harvest_lock
# Returns the length of the unsent errors array, if it exists,
# otherwise nil
def unsent_errors_size
@unsent_errors.length if @unsent_errors
@@ -555,26 +554,16 @@
def log_worker_loop_start
::NewRelic::Agent.logger.debug "Reporting performance data every #{Agent.config[:data_report_period]} seconds."
::NewRelic::Agent.logger.debug "Running worker loop"
end
- # Accessor for the harvest lock
- def harvest_lock
- return nil if @worker_loop.nil?
- @worker_loop.lock
- end
-
# Synchronize with the harvest loop. If the harvest thread has taken
# a lock (DNS lookups, backticks, agent-owned locks, etc), and we
# fork while locked, this can deadlock child processes. For more
# details, see https://github.com/resque/resque/issues/1101
def synchronize_with_harvest
- if harvest_lock
- harvest_lock.synchronize do
- yield
- end
- else
+ harvest_lock.synchronize do
yield
end
end
# Some forking cases (like Resque) end up with harvest lock from the
@@ -620,13 +609,16 @@
disconnect
end
# Handles an unknown error in the worker thread by logging
# it and disconnecting the agent, since we are now in an
- # unknown state
+ # unknown state.
def handle_other_error(error)
- ::NewRelic::Agent.logger.error "Terminating worker loop.", error
+ ::NewRelic::Agent.logger.error "Unhandled error in worker thread, disconnecting this agent process:"
+ # These errors are fatal (that is, they will prevent the agent from
+ # reporting entirely), so we really want backtraces when they happen
+ ::NewRelic::Agent.logger.log_exception(:error, error)
disconnect
end
# a wrapper method to handle all the errors that can happen
# in the connection and worker thread system. This
@@ -919,20 +911,22 @@
@connect_state = :connected
rescue NewRelic::Agent::LicenseException => e
handle_license_error(e)
rescue NewRelic::Agent::UnrecoverableAgentException => e
handle_unrecoverable_agent_error(e)
- rescue Timeout::Error, StandardError => e
+ rescue Timeout::Error => e
log_error(e)
if opts[:keep_retrying]
note_connect_failure
::NewRelic::Agent.logger.warn "Will re-attempt in #{connect_retry_period} seconds"
sleep connect_retry_period
retry
else
disconnect
end
+ rescue StandardError => e
+ handle_other_error(e)
end
# Who am I? Well, this method can tell you your hostname.
def determine_host
Socket.gethostname
@@ -982,11 +976,10 @@
# Fills the traces array with the harvested transactions from
# the transaction sampler, subject to the setting for slowest
# transaction threshold
def harvest_transaction_traces
@traces = @transaction_sampler.harvest(@traces)
- @traces
end
def harvest_and_send_slowest_sql
# FIXME add the code to try to resend if our connection is down
sql_traces = @sql_sampler.harvest
@@ -1043,18 +1036,14 @@
@service.transaction_sample_data(traces)
::NewRelic::Agent.logger.debug "Sent slowest sample (#{@service.agent_id}) in #{Time.now - start_time} seconds"
end
- def harvest_and_send_thread_profile(disconnecting=false)
- @thread_profiler.stop(true) if disconnecting
-
- if @thread_profiler.finished?
- profile = @thread_profiler.harvest
-
- ::NewRelic::Agent.logger.debug "Sending thread profile #{profile.profile_id}"
- @service.profile_data(profile)
+ def harvest_and_send_for_agent_commands(disconnecting=false)
+ data = @agent_command_router.harvest_data_to_send(disconnecting)
+ data.each do |service_method, payload|
+ @service.send(service_method, payload)
end
end
# Gets the collection of unsent errors from the error
# collector. We pass back in an existing array of errors that
@@ -1085,20 +1074,32 @@
end
end
# Fetch samples from the RequestSampler and send them.
def harvest_and_send_analytic_event_data
- samples = @request_sampler.samples
- @service.analytic_event_data(samples) unless samples.empty?
- @request_sampler.reset
+ samples = @request_sampler.harvest
+ begin
+ @service.analytic_event_data(samples) unless samples.empty?
+ rescue
+ @request_sampler.merge(samples)
+ raise
+ end
end
- def handle_agent_commands
- @agent_command_router.handle_agent_commands
+ def check_for_and_handle_agent_commands
+ @agent_command_router.check_for_and_handle_agent_commands
end
def transmit_data(disconnecting=false)
+ harvest_lock.synchronize do
+ transmit_data_already_locked(disconnecting)
+ end
+ end
+
+ # This method is expected to only be called with the harvest_lock
+ # already held
+ def transmit_data_already_locked(disconnecting)
now = Time.now
::NewRelic::Agent.logger.debug "Sending data to New Relic Service"
@events.notify(:before_harvest)
@service.session do # use http keep-alive
@@ -1106,12 +1107,12 @@
harvest_and_send_slowest_sample
harvest_and_send_slowest_sql
harvest_and_send_timeslice_data
harvest_and_send_analytic_event_data
- handle_agent_commands
- harvest_and_send_thread_profile(disconnecting)
+ check_for_and_handle_agent_commands
+ harvest_and_send_for_agent_commands(disconnecting)
end
rescue EOFError => e
::NewRelic::Agent.logger.warn("EOFError after #{Time.now - now}s when transmitting data to New Relic Service.")
::NewRelic::Agent.logger.debug(e)
rescue => e
@@ -1125,9 +1126,11 @@
ensure
NewRelic::Agent::Database.close_connections
duration = (Time.now - now).to_f
@stats_engine.record_metrics('Supportability/Harvest', duration)
end
+
+ private :transmit_data_already_locked
# This method contacts the server to send remaining data and
# let the server know that the agent is shutting down - this
# allows us to do things like accurately set the end of the
# lifetime of the process