lib/new_relic/agent/agent.rb in newrelic_rpm-3.6.7.159 vs lib/new_relic/agent/agent.rb in newrelic_rpm-3.6.8.164

- old
+ new

@@ -13,11 +13,10 @@ require 'new_relic/agent/new_relic_service' require 'new_relic/agent/pipe_service' require 'new_relic/agent/configuration/manager' require 'new_relic/agent/database' require 'new_relic/agent/commands/agent_command_router' -require 'new_relic/agent/commands/thread_profiler' require 'new_relic/agent/event_listener' require 'new_relic/agent/cross_app_monitor' require 'new_relic/agent/request_sampler' require 'new_relic/agent/sampler_collection' require 'new_relic/environment_report' @@ -43,12 +42,11 @@ @events = NewRelic::Agent::EventListener.new @stats_engine = NewRelic::Agent::StatsEngine.new @transaction_sampler = NewRelic::Agent::TransactionSampler.new @sql_sampler = NewRelic::Agent::SqlSampler.new - @thread_profiler = NewRelic::Agent::Commands::ThreadProfiler.new - @agent_command_router = NewRelic::Agent::Commands::AgentCommandRouter.new(@thread_profiler) + @agent_command_router = NewRelic::Agent::Commands::AgentCommandRouter.new(@events) @cross_app_monitor = NewRelic::Agent::CrossAppMonitor.new(@events) @error_collector = NewRelic::Agent::ErrorCollector.new @transaction_rules = NewRelic::Agent::RulesEngine.new @metric_rules = NewRelic::Agent::RulesEngine.new @request_sampler = NewRelic::Agent::RequestSampler.new(@events) @@ -57,10 +55,11 @@ @connect_state = :pending @connect_attempts = 0 @environment_report = nil @last_harvest_time = Time.now + @harvest_lock = Mutex.new @obfuscator = lambda {|sql| NewRelic::Agent::Database.default_sql_obfuscator(sql) } end # contains all the class-level methods for NewRelic::Agent::Agent module ClassMethods @@ -80,12 +79,11 @@ # the statistics engine that holds all the timeslice data attr_reader :stats_engine # the transaction sampler that handles recording transactions attr_reader :transaction_sampler attr_reader :sql_sampler - # begins a thread profile session when instructed by agent commands - attr_reader :thread_profiler + # manages agent commands we receive from the collector, and the handlers attr_reader :agent_command_router # error collector is a simple collection of recorded errors attr_reader :error_collector attr_reader :harvest_samplers # whether we should record raw, obfuscated, or no sql @@ -107,10 +105,11 @@ # Transaction and metric renaming rules as provided by the # collector on connect. The former are applied during txns, # the latter during harvest. attr_reader :transaction_rules attr_reader :metric_rules + attr_reader :harvest_lock # Returns the length of the unsent errors array, if it exists, # otherwise nil def unsent_errors_size @unsent_errors.length if @unsent_errors @@ -555,26 +554,16 @@ def log_worker_loop_start ::NewRelic::Agent.logger.debug "Reporting performance data every #{Agent.config[:data_report_period]} seconds." ::NewRelic::Agent.logger.debug "Running worker loop" end - # Accessor for the harvest lock - def harvest_lock - return nil if @worker_loop.nil? - @worker_loop.lock - end - # Synchronize with the harvest loop. If the harvest thread has taken # a lock (DNS lookups, backticks, agent-owned locks, etc), and we # fork while locked, this can deadlock child processes. For more # details, see https://github.com/resque/resque/issues/1101 def synchronize_with_harvest - if harvest_lock - harvest_lock.synchronize do - yield - end - else + harvest_lock.synchronize do yield end end # Some forking cases (like Resque) end up with harvest lock from the @@ -620,13 +609,16 @@ disconnect end # Handles an unknown error in the worker thread by logging # it and disconnecting the agent, since we are now in an - # unknown state + # unknown state. def handle_other_error(error) - ::NewRelic::Agent.logger.error "Terminating worker loop.", error + ::NewRelic::Agent.logger.error "Unhandled error in worker thread, disconnecting this agent process:" + # These errors are fatal (that is, they will prevent the agent from + # reporting entirely), so we really want backtraces when they happen + ::NewRelic::Agent.logger.log_exception(:error, error) disconnect end # a wrapper method to handle all the errors that can happen # in the connection and worker thread system. This @@ -919,20 +911,22 @@ @connect_state = :connected rescue NewRelic::Agent::LicenseException => e handle_license_error(e) rescue NewRelic::Agent::UnrecoverableAgentException => e handle_unrecoverable_agent_error(e) - rescue Timeout::Error, StandardError => e + rescue Timeout::Error => e log_error(e) if opts[:keep_retrying] note_connect_failure ::NewRelic::Agent.logger.warn "Will re-attempt in #{connect_retry_period} seconds" sleep connect_retry_period retry else disconnect end + rescue StandardError => e + handle_other_error(e) end # Who am I? Well, this method can tell you your hostname. def determine_host Socket.gethostname @@ -982,11 +976,10 @@ # Fills the traces array with the harvested transactions from # the transaction sampler, subject to the setting for slowest # transaction threshold def harvest_transaction_traces @traces = @transaction_sampler.harvest(@traces) - @traces end def harvest_and_send_slowest_sql # FIXME add the code to try to resend if our connection is down sql_traces = @sql_sampler.harvest @@ -1043,18 +1036,14 @@ @service.transaction_sample_data(traces) ::NewRelic::Agent.logger.debug "Sent slowest sample (#{@service.agent_id}) in #{Time.now - start_time} seconds" end - def harvest_and_send_thread_profile(disconnecting=false) - @thread_profiler.stop(true) if disconnecting - - if @thread_profiler.finished? - profile = @thread_profiler.harvest - - ::NewRelic::Agent.logger.debug "Sending thread profile #{profile.profile_id}" - @service.profile_data(profile) + def harvest_and_send_for_agent_commands(disconnecting=false) + data = @agent_command_router.harvest_data_to_send(disconnecting) + data.each do |service_method, payload| + @service.send(service_method, payload) end end # Gets the collection of unsent errors from the error # collector. We pass back in an existing array of errors that @@ -1085,20 +1074,32 @@ end end # Fetch samples from the RequestSampler and send them. def harvest_and_send_analytic_event_data - samples = @request_sampler.samples - @service.analytic_event_data(samples) unless samples.empty? - @request_sampler.reset + samples = @request_sampler.harvest + begin + @service.analytic_event_data(samples) unless samples.empty? + rescue + @request_sampler.merge(samples) + raise + end end - def handle_agent_commands - @agent_command_router.handle_agent_commands + def check_for_and_handle_agent_commands + @agent_command_router.check_for_and_handle_agent_commands end def transmit_data(disconnecting=false) + harvest_lock.synchronize do + transmit_data_already_locked(disconnecting) + end + end + + # This method is expected to only be called with the harvest_lock + # already held + def transmit_data_already_locked(disconnecting) now = Time.now ::NewRelic::Agent.logger.debug "Sending data to New Relic Service" @events.notify(:before_harvest) @service.session do # use http keep-alive @@ -1106,12 +1107,12 @@ harvest_and_send_slowest_sample harvest_and_send_slowest_sql harvest_and_send_timeslice_data harvest_and_send_analytic_event_data - handle_agent_commands - harvest_and_send_thread_profile(disconnecting) + check_for_and_handle_agent_commands + harvest_and_send_for_agent_commands(disconnecting) end rescue EOFError => e ::NewRelic::Agent.logger.warn("EOFError after #{Time.now - now}s when transmitting data to New Relic Service.") ::NewRelic::Agent.logger.debug(e) rescue => e @@ -1125,9 +1126,11 @@ ensure NewRelic::Agent::Database.close_connections duration = (Time.now - now).to_f @stats_engine.record_metrics('Supportability/Harvest', duration) end + + private :transmit_data_already_locked # This method contacts the server to send remaining data and # let the server know that the agent is shutting down - this # allows us to do things like accurately set the end of the # lifetime of the process