lib/new_relic/agent/agent.rb in newrelic_rpm-3.9.6.257 vs lib/new_relic/agent/agent.rb in newrelic_rpm-3.9.7.266

- old
+ new

@@ -17,11 +17,13 @@ require 'new_relic/agent/configuration/manager' require 'new_relic/agent/database' require 'new_relic/agent/commands/agent_command_router' require 'new_relic/agent/event_listener' require 'new_relic/agent/cross_app_monitor' -require 'new_relic/agent/request_sampler' +require 'new_relic/agent/synthetics_monitor' +require 'new_relic/agent/transaction_event_aggregator' +require 'new_relic/agent/custom_event_aggregator' require 'new_relic/agent/sampler_collection' require 'new_relic/agent/javascript_instrumentor' require 'new_relic/agent/vm/monotonic_gc_profiler' require 'new_relic/environment_report' @@ -46,18 +48,23 @@ @stats_engine = NewRelic::Agent::StatsEngine.new @transaction_sampler = NewRelic::Agent::TransactionSampler.new @sql_sampler = NewRelic::Agent::SqlSampler.new @agent_command_router = NewRelic::Agent::Commands::AgentCommandRouter.new(@events) @cross_app_monitor = NewRelic::Agent::CrossAppMonitor.new(@events) + @synthetics_monitor = NewRelic::Agent::SyntheticsMonitor.new(@events) @error_collector = NewRelic::Agent::ErrorCollector.new @transaction_rules = NewRelic::Agent::RulesEngine.new - @request_sampler = NewRelic::Agent::RequestSampler.new(@events) @harvest_samplers = NewRelic::Agent::SamplerCollection.new(@events) @javascript_instrumentor = NewRelic::Agent::JavascriptInstrumentor.new(@events) - @harvester = NewRelic::Agent::Harvester.new(@events) @monotonic_gc_profiler = NewRelic::Agent::VM::MonotonicGCProfiler.new + @harvester = NewRelic::Agent::Harvester.new(@events) + @after_fork_lock = Mutex.new + + @transaction_event_aggregator = NewRelic::Agent::TransactionEventAggregator.new(@events) + @custom_event_aggregator = NewRelic::Agent::CustomEventAggregator.new + @connect_state = :pending @connect_attempts = 0 @environment_report = nil @harvest_lock = Mutex.new @@ -110,10 +117,11 @@ # Responsbile for restarting the harvest thread attr_reader :harvester attr_reader :harvest_lock # GC::Profiler.total_time is not monotonic so we wrap it. attr_reader :monotonic_gc_profiler + attr_reader :custom_event_aggregator # This method should be called in a forked process after a fork. # It assumes the parent process initialized the agent, but does # not assume the agent started. # @@ -131,39 +139,44 @@ # had not connected. # * <tt>:keep_retrying => false</tt> if we try to initiate a new # connection, this tells me to only try it once so this method returns # quickly if there is some kind of latency with the server. def after_fork(options={}) - # Mark started early because if we've explicitly called after_fork, - # we should be ready to run and shouldn't restarting if we can't. - @harvester.mark_started - - if channel_id = options[:report_to_channel] - @service = NewRelic::Agent::PipeService.new(channel_id) - if connected? - @connected_pid = Process.pid - else - ::NewRelic::Agent.logger.debug("Child process #{Process.pid} not reporting to non-connected parent (process #{Process.ppid}).") - @service.shutdown(Time.now) - disconnect - end + needs_restart = false + @after_fork_lock.synchronize do + needs_restart = @harvester.needs_restart? + @harvester.mark_started end - return if !Agent.config[:agent_enabled] || + return if !needs_restart || + !Agent.config[:agent_enabled] || !Agent.config[:monitor_mode] || - disconnected? || - @worker_thread && @worker_thread.alive? + disconnected? ::NewRelic::Agent.logger.debug "Starting the worker thread in #{Process.pid} (parent #{Process.ppid}) after forking." + channel_id = options[:report_to_channel] + install_pipe_service(channel_id) if channel_id + # Clear out locks and stats left over from parent process reset_objects_with_locks drop_buffered_data setup_and_start_agent(options) end + def install_pipe_service(channel_id) + @service = NewRelic::Agent::PipeService.new(channel_id) + if connected? + @connected_pid = Process.pid + else + ::NewRelic::Agent.logger.debug("Child process #{Process.pid} not reporting to non-connected parent (process #{Process.ppid}).") + @service.shutdown(Time.now) + disconnect + end + end + # True if we have initialized and completed 'start' def started? @started end @@ -512,11 +525,12 @@ # Clear out the metric data, errors, and transaction traces, etc. def drop_buffered_data @stats_engine.reset! @error_collector.reset! @transaction_sampler.reset! - @request_sampler.reset! + @transaction_event_aggregator.reset! + @custom_event_aggregator.reset! @sql_sampler.reset! end # Deprecated, and not part of the public API, but here for backwards # compatibility because some 3rd-party gems call it. @@ -529,10 +543,17 @@ def reset_objects_with_locks @stats_engine = NewRelic::Agent::StatsEngine.new reset_harvest_locks end + def flush_pipe_data + if connected? && @service.is_a?(::NewRelic::Agent::PipeService) + transmit_data + transmit_event_data + end + end + private # All of this module used to be contained in the # start_worker_thread method - this is an artifact of # refactoring and can be moved, renamed, etc at will @@ -575,20 +596,26 @@ period = MIN_ALLOWED_REPORT_PERIOD end period end + LOG_ONCE_KEYS_RESET_PERIOD = 60.0 + def create_and_run_event_loop @event_loop = create_event_loop @event_loop.on(:report_data) do transmit_data end @event_loop.on(:report_event_data) do transmit_event_data end + @event_loop.on(:reset_log_once_keys) do + ::NewRelic::Agent.logger.clear_already_logged + end @event_loop.fire_every(Agent.config[:data_report_period], :report_data) @event_loop.fire_every(report_period_for(:analytic_event_data), :report_event_data) + @event_loop.fire_every(LOG_ONCE_KEYS_RESET_PERIOD, :reset_log_once_keys) @event_loop.run end # Handles the case where the server tells us to restart - # this clears the data, clears connection attempts, and @@ -852,11 +879,11 @@ def container_for_endpoint(endpoint) case endpoint when :metric_data then @stats_engine when :transaction_sample_data then @transaction_sampler when :error_data then @error_collector - when :analytic_event_data then @request_sampler + when :analytic_event_data then @transaction_event_aggregator when :sql_trace_data then @sql_sampler end end def merge_data_for_endpoint(endpoint, data) @@ -976,10 +1003,13 @@ raise rescue SerializationError => e NewRelic::Agent.logger.warn("Failed to serialize data for #{endpoint}, discarding. Error: ", e) rescue UnrecoverableServerException => e NewRelic::Agent.logger.warn("#{endpoint} data was rejected by remote service, discarding. Error: ", e) + rescue ServerConnectionException => e + log_remote_unavailable(endpoint, e) + container.merge!(items) rescue => e NewRelic::Agent.logger.info("Unable to send #{endpoint} data, will try again later. Error: ", e) container.merge!(items) end end @@ -1010,20 +1040,29 @@ def harvest_and_send_errors harvest_and_send_from_container(@error_collector, :error_data) end def harvest_and_send_analytic_event_data - harvest_and_send_from_container(@request_sampler, :analytic_event_data) + harvest_and_send_from_container(@transaction_event_aggregator, :analytic_event_data) + harvest_and_send_from_container(@custom_event_aggregator, :analytic_event_data) end def check_for_and_handle_agent_commands begin @agent_command_router.check_for_and_handle_agent_commands rescue ForceRestartException, ForceDisconnectException raise + rescue ServerConnectionException => e + log_remote_unavailable(:get_agent_commands, e) rescue => e NewRelic::Agent.logger.info("Error during check_for_and_handle_agent_commands, will retry later: ", e) end + end + + def log_remote_unavailable(endpoint, e) + NewRelic::Agent.logger.debug("Unable to send #{endpoint} data, will try again later. Error: ", e) + NewRelic::Agent.record_metric("Supportability/remote_unavailable", 0.0) + NewRelic::Agent.record_metric("Supportability/remote_unavailable/#{endpoint.to_s}", 0.0) end def transmit_data harvest_lock.synchronize do transmit_data_already_locked