agent.rb in newrelic_rpm-3.5.0

- old
+ new

@@ -5,29 +5,29 @@
 require 'zlib'
 require 'stringio'
 require 'new_relic/agent/new_relic_service'
 require 'new_relic/agent/pipe_service'
 require 'new_relic/agent/configuration/manager'
+require 'new_relic/agent/database'
 
 module NewRelic
   module Agent
 
     # The Agent is a singleton that is instantiated when the plugin is
     # activated.  It collects performance data from ruby applications
     # in realtime as the application runs, and periodically sends that
     # data to the NewRelic server.
     class Agent
       extend NewRelic::Agent::Configuration::Instance
-      
+
       def initialize
         @launch_time = Time.now
 
         @metric_ids = {}
         @stats_engine = NewRelic::Agent::StatsEngine.new
         @transaction_sampler = NewRelic::Agent::TransactionSampler.new
         @sql_sampler = NewRelic::Agent::SqlSampler.new
-        @stats_engine.transaction_sampler = @transaction_sampler
         @error_collector = NewRelic::Agent::ErrorCollector.new
         @connect_attempts = 0
 
         @last_harvest_time = Time.now
         @obfuscator = lambda {|sql| NewRelic::Agent::Database.default_sql_obfuscator(sql) }
@@ -35,25 +35,36 @@
 
         # FIXME: temporary work around for RUBY-839
         if Agent.config[:monitor_mode]
           @service = NewRelic::Agent::NewRelicService.new
         end
+
+        txn_tracer_enabler = Proc.new do
+          if NewRelic::Agent.config[:'transaction_tracer.enabled'] ||
+              NewRelic::Agent.config[:developer_mode]
+            @stats_engine.transaction_sampler = @transaction_sampler
+          else
+            @stats_engine.transaction_sampler = nil
+          end
+        end
+        Agent.config.register_callback(:'transaction_tracer.enabled', &txn_tracer_enabler)
+        Agent.config.register_callback(:developer_mode, &txn_tracer_enabler)
       end
 
       # contains all the class-level methods for NewRelic::Agent::Agent
       module ClassMethods
         # Should only be called by NewRelic::Control - returns a
         # memoized singleton instance of the agent, creating one if needed
         def instance
           @instance ||= self.new
         end
       end
-      
+
       # Holds all the methods defined on NewRelic::Agent::Agent
       # instances
       module InstanceMethods
-        
+
         # holds a proc that is used to obfuscate sql statements
         attr_reader :obfuscator
         # the statistics engine that holds all the timeslice data
         attr_reader :stats_engine
         # the transaction sampler that handles recording transactions
@@ -73,34 +84,34 @@
         # a configuration for the Real User Monitoring system -
         # handles things like static setup of the header for inclusion
         # into pages
         attr_reader :beacon_configuration
         attr_accessor :service
-        
+
         # Returns the length of the unsent errors array, if it exists,
         # otherwise nil
         def unsent_errors_size
           @unsent_errors.length if @unsent_errors
         end
-        
+
         # Returns the length of the traces array, if it exists,
         # otherwise nil
         def unsent_traces_size
           @traces.length if @traces
         end
-        
+
         # Initializes the unsent timeslice data hash, if needed, and
         # returns the number of keys it contains
         def unsent_timeslice_data
           @unsent_timeslice_data ||= {}
           @unsent_timeslice_data.keys.length
         end
 
         # fakes out a transaction that did not happen in this process
         # by creating apdex, summary metrics, and recording statistics
         # for the transaction
-        # 
+        #
         # This method is *deprecated* - it may be removed in future
         # versions of the agent
         def record_transaction(duration_seconds, options={})
           is_error = options['is_error'] || options['error_message'] || options['exception']
           metric = options['metric']
@@ -151,41 +162,43 @@
         # * <tt>:keep_retrying => false</tt> if we try to initiate a new
         #   connection, this tells me to only try it once so this method returns
         #   quickly if there is some kind of latency with the server.
         def after_fork(options={})
           @forked = true
+          Agent.config.apply_config(NewRelic::Agent::Configuration::ManualSource.new(options), 1)
+
           # @connected gets false after we fail to connect or have an error
           # connecting.  @connected has nil if we haven't finished trying to connect.
           # or we didn't attempt a connection because this is the master process
-          
+
           if channel_id = options[:report_to_channel]
             @service = NewRelic::Agent::PipeService.new(channel_id)
             @connected_pid = $$
             @metric_ids = {}
           end
-          
+
           # log.debug "Agent received after_fork notice in #$$: [#{control.agent_enabled?}; monitor=#{control.monitor_mode?}; connected: #{@connected.inspect}; thread=#{@worker_thread.inspect}]"
           return if !Agent.config[:agent_enabled] ||
             !Agent.config[:monitor_mode] ||
             @connected == false ||
             @worker_thread && @worker_thread.alive?
 
-          log.info "Starting the worker thread in #$$ after forking."
+          log.info "Starting the worker thread in #{$$} after forking."
 
           # Clear out stats that are left over from parent process
           reset_stats
 
           # Don't ever check to see if this is a spawner.  If we're in a forked process
           # I'm pretty sure we're not also forking new instances.
           start_worker_thread(options)
           @stats_engine.start_sampler_thread
         end
-        
+
         def forked?
           @forked
         end
-        
+
         # True if we have initialized and completed 'start'
         def started?
           @started
         end
 
@@ -320,11 +333,11 @@
               log.info "Application: #{names.join(", ")}"
             else
               log.error 'Unable to determine application name. Please set the application name in your newrelic.yml or in a NEW_RELIC_APP_NAME environment variable.'
             end
           end
-          
+
           # Connecting in the foreground blocks further startup of the
           # agent until we have a connection - useful in cases where
           # you're trying to log a very-short-running process and want
           # to get statistics from before a server connection
           # (typically 20 seconds) exists
@@ -390,17 +403,19 @@
           end
 
           # Warn the user if they have configured their agent not to
           # send data, that way we can see this clearly in the log file
           def monitoring?
-            log_unless(Agent.config[:monitor_mode], :warn, "Agent configured not to send data in this environment - edit newrelic.yml to change this")
+            log_unless(Agent.config[:monitor_mode], :warn,
+                       "Agent configured not to send data in this environment.")
           end
 
           # Tell the user when the license key is missing so they can
           # fix it by adding it to the file
           def has_license_key?
-            log_unless(Agent.config[:license_key], :error, "No license key found.  Please edit your newrelic.yml file and insert your license key.")
+            log_unless(Agent.config[:license_key], :warn,
+                       "No license key found in newrelic.yml config.")
           end
 
           # A correct license key exists and is of the proper length
           def has_correct_license_key?
             has_license_key? && correct_license_length
@@ -440,11 +455,10 @@
           return if already_started? || disabled?
           @started = true
           @local_host = determine_host
           log_dispatcher
           log_app_names
-          config_transaction_tracer
           check_config_and_start_agent
           log_version_and_pid
           notify_log_file_location
         end
 
@@ -458,48 +472,27 @@
           @last_harvest_time = Time.now
           @launch_time = Time.now
         end
 
         private
-        
+
         # All of this module used to be contained in the
         # start_worker_thread method - this is an artifact of
         # refactoring and can be moved, renamed, etc at will
         module StartWorkerThread
-
-          # disable transaction sampling if disabled by the server
-          # and we're not in dev mode
-          def check_transaction_sampler_status
-            if Agent.config[:developer_mode] || @should_send_samples
-              @transaction_sampler.enable
-            else
-              @transaction_sampler.disable
-            end
-          end
-          
-          def check_sql_sampler_status
-            # disable sql sampling if disabled by the server
-            # and we're not in dev mode
-            if Agent.config[:'slow_sql.enabled'] && ['raw', 'obfuscated'].include?(Agent.config[:'slow_sql.record_sql']) && Agent.config[:'transaction_tracer.enabled']
-              @sql_sampler.enable
-            else
-              @sql_sampler.disable
-            end
-          end
-
           # logs info about the worker loop so users can see when the
           # agent actually begins running in the background
           def log_worker_loop_start
-            log.info "Reporting performance data every #{@report_period} seconds."
+            log.info "Reporting performance data every #{Agent.config[:data_report_period]} seconds."
             log.debug "Running worker loop"
           end
 
           # Creates the worker loop and loads it with the instructions
           # it should run every @report_period seconds
           def create_and_run_worker_loop
             @worker_loop = WorkerLoop.new
-            @worker_loop.run(@report_period) do
+            @worker_loop.run(Agent.config[:data_report_period]) do
               transmit_data
             end
           end
 
           # Handles the case where the server tells us to restart -
@@ -568,12 +561,10 @@
                 # the server rejected us for a licensing reason and we should
                 # just exit the thread.  If it returns nil
                 # that means it didn't try to connect because we're in the master.
                 connect(connection_options)
                 if @connected
-                  check_transaction_sampler_status
-                  check_sql_sampler_status
                   log_worker_loop_start
                   create_and_run_worker_loop
                   # never reaches here unless there is a problem or
                   # the agent is exiting
                 else
@@ -598,11 +589,11 @@
 
         # A shorthand for NewRelic::Control.instance
         def control
           NewRelic::Control.instance
         end
-        
+
         # This module is an artifact of a refactoring of the connect
         # method - all of its methods are used in that context, so it
         # can be refactored at will. It should be fully tested
         module Connect
           # the frequency with which we should try to connect to the
@@ -729,110 +720,15 @@
           def connect_to_server
             log_seed_token
             @service.connect(connect_settings)
           end
 
-          # Configures the error collector if the server says that we
-          # are allowed to send errors. Pretty simple, and logs at
-          # debug whether errors will or will not be sent.
-          def configure_error_collector!(server_enabled)
-            # Reinitialize the error collector
-            @error_collector = NewRelic::Agent::ErrorCollector.new
-            # Ask for permission to collect error data
-            enabled = if error_collector.config_enabled && server_enabled
-                        error_collector.enabled = true
-                      else
-                        error_collector.enabled = false
-                      end
-            log.debug "Errors will #{enabled ? '' : 'not '}be sent to the New Relic service."
-          end
-
-          # Random sampling is enabled based on a sample rate, which
-          # is the n in "every 1/n transactions is added regardless of
-          # its length".
-          #
-          # uses a sane default for sampling rate if the sampling rate
-          # is zero, since the collector currently sends '0' as a
-          # sampling rate for all accounts, which is probably for
-          # legacy reasons
-          def enable_random_samples!(sample_rate)
-            sample_rate = 10 unless sample_rate.to_i > 0
-            @transaction_sampler.random_sampling = true
-            @transaction_sampler.sampling_rate = sample_rate
-            log.info "Transaction sampling enabled, rate = #{@transaction_sampler.sampling_rate}"
-          end
-
-          # this entire method should be done on the transaction
-          # sampler object, rather than here. We should pass in the
-          # sampler config.
-          def config_transaction_tracer
-            # Reconfigure the transaction tracer
-            @transaction_sampler.configure!
-            @sql_sampler.configure!
-            @should_send_samples = @config_should_send_samples = Agent.config[:'transaction_tracer.enabled']
-            @should_send_random_samples = Agent.config[:'transaction_tracer.random_sample']
-            set_sql_recording!
-
-            # default to 2.0, string 'apdex_f' will turn into your
-            # apdex * 4
-            @slowest_transaction_threshold = Agent.config[:'transaction_tracer.transaction_threshold']
-          end
-
-          # Enables or disables the transaction tracer and sets its
-          # options based on the options provided to the
-          # method.
-          def configure_transaction_tracer!(server_enabled, sample_rate)
-            # Ask the server for permission to send transaction samples.
-            # determined by subscription license.
-            @sql_sampler.configure!
-            @should_send_samples = @config_should_send_samples && server_enabled
-            
-            if @should_send_samples
-              # I don't think this is ever true, but...
-              enable_random_samples!(sample_rate) if @should_send_random_samples
-              
-              @transaction_sampler.slow_capture_threshold = @slowest_transaction_threshold
-              
-              log.debug "Transaction tracing threshold is #{@slowest_transaction_threshold} seconds."
-            else
-              log.debug "Transaction traces will not be sent to the New Relic service."
-            end
-          end
-
           # apdex_f is always 4 times the apdex_t
           def apdex_f
             (4 * Agent.config[:apdex_t]).to_f
           end
 
-          # Sets the sql recording configuration by trying to detect
-          # any attempt to disable the sql collection - 'off',
-          # 'false', 'none', and friends. Otherwise, we accept 'raw',
-          # and unrecognized values default to 'obfuscated'
-          def set_sql_recording!
-            record_sql_config = Agent.config[:'transaction_tracer.record_sql']
-            case record_sql_config.to_s
-            when 'off'
-              @record_sql = :off
-            when 'none'
-              @record_sql = :off
-            when 'false'
-              @record_sql = :off
-            when 'raw'
-              @record_sql = :raw
-            else
-              @record_sql = :obfuscated
-            end
-
-            log_sql_transmission_warning?
-          end
-
-          # Warn the user when we are sending raw sql across the wire
-          # - they should probably be using ssl when this is true
-          def log_sql_transmission_warning?
-            log.warn("Agent is configured to send raw SQL to the service") if @record_sql == :raw
-          end
-
           # Sets the collector host and connects to the server, then
           # invokes the final configuration with the returned data
           def query_server_for_configuration
             finish_setup(connect_to_server)
           end
@@ -844,36 +740,41 @@
           #
           # Can accommodate most arbitrary data - anything extra is
           # ignored unless we say to do something with it here.
           def finish_setup(config_data)
             return if config_data == nil
-            
+
             @service.agent_id = config_data['agent_run_id'] if @service
-            @report_period = config_data['data_report_period']
-            @url_rules = config_data['url_rules']
-            @beacon_configuration = BeaconConfiguration.new(config_data)
 
-            if config_data['listen_to_server_config']
+            if config_data['agent_config']
               log.info "Using config from server"
-              log.debug "Server provided config: #{config_data.inspect}"
-              server_config = NewRelic::Agent::Configuration::ServerSource.new(config_data)
-              Agent.config.apply_config(server_config, 1)
             end
 
-            config_transaction_tracer
+            log.debug "Server provided config: #{config_data.inspect}"
+            server_config = NewRelic::Agent::Configuration::ServerSource.new(config_data)
+            Agent.config.apply_config(server_config, 1)
             log_connection!(config_data) if @service
-            configure_transaction_tracer!(config_data['collect_traces'], config_data['sample_rate'])
-            configure_error_collector!(config_data['collect_errors'])
+
+            @beacon_configuration = BeaconConfiguration.new
           end
-          
+
           # Logs when we connect to the server, for debugging purposes
           # - makes sure we know if an agent has not connected
           def log_connection!(config_data)
-            control.log! "Connected to NewRelic Service at #{@service.collector.name}"
+            log.info "Connected to NewRelic Service at #{@service.collector.name}"
             log.debug "Agent Run       = #{@service.agent_id}."
             log.debug "Connection data = #{config_data.inspect}"
+            if config_data['messages'] && config_data['messages'].any?
+              log_collector_messages(config_data['messages'])
+            end
           end
+
+          def log_collector_messages(messages)
+            messages.each do |message|
+              log.send(message['level'].downcase.to_sym, message['message'])
+            end
+          end
         end
         include Connect
 
 
         # Serialize all the important data that the agent might want
@@ -1003,15 +904,20 @@
         def harvest_and_send_timeslice_data
           now = Time.now
           NewRelic::Agent.instance.stats_engine.get_stats_no_scope('Supportability/invoke_remote').record_data_point(0.0)
           NewRelic::Agent.instance.stats_engine.get_stats_no_scope('Supportability/invoke_remote/metric_data').record_data_point(0.0)
           harvest_timeslice_data(now)
-          # In this version of the protocol, we get back an assoc array of spec to id.            
-          metric_specs_and_ids = @service.metric_data(@last_harvest_time.to_f,
-                                                      now.to_f,
-                                                      @unsent_timeslice_data.values)
-          metric_specs_and_ids ||= []
+          # In this version of the protocol
+          # we get back an assoc array of spec to id.
+          metric_specs_and_ids = []
+          begin
+            metric_specs_and_ids = @service.metric_data(@last_harvest_time.to_f,
+                                                now.to_f,
+                                                @unsent_timeslice_data.values)
+          rescue UnrecoverableServerException => e
+            log.debug e.message
+          end
           fill_metric_id_cache(metric_specs_and_ids)
 
           log.debug "#{now}: sent #{@unsent_timeslice_data.length} timeslices (#{@service.agent_id}) in #{Time.now - now} seconds"
 
           # if we successfully invoked this web service, then clear the unsent message cache.
@@ -1021,23 +927,26 @@
 
         # Fills the traces array with the harvested transactions from
         # the transaction sampler, subject to the setting for slowest
         # transaction threshold
         def harvest_transaction_traces
-          @traces = @transaction_sampler.harvest(@traces, @slowest_transaction_threshold)
+          @traces = @transaction_sampler.harvest(@traces)
           @traces
         end
 
         def harvest_and_send_slowest_sql
           # FIXME add the code to try to resend if our connection is down
           sql_traces = @sql_sampler.harvest
           unless sql_traces.empty?
             log.debug "Sending (#{sql_traces.size}) sql traces"
             begin
               @service.sql_trace_data(sql_traces)
-            rescue
-              @sql_sampler.merge sql_traces 
+            rescue UnrecoverableServerException => e
+              log.debug e.message
+            rescue => e
+              log.debug "Remerging SQL traces after #{e.class.name}: #{e.message}"
+              @sql_sampler.merge sql_traces
             end
           end
         end
 
         # This handles getting the transaction traces and then sending
@@ -1049,26 +958,25 @@
         def harvest_and_send_slowest_sample
           harvest_transaction_traces
           unless @traces.empty?
             now = Time.now
             log.debug "Sending (#{@traces.length}) transaction traces"
-            
+
             begin
               options = { :keep_backtraces => true }
-              options[:record_sql] = @record_sql unless @record_sql == :off
-              if @transaction_sampler.explain_enabled
-                options[:explain_sql] = @transaction_sampler.explain_threshold
+              if !(NewRelic::Agent::Database.record_sql_method == :off)
+                options[:record_sql] = NewRelic::Agent::Database.record_sql_method
               end
+              if Agent.config[:'transaction_tracer.explain_enabled']
+                options[:explain_sql] = Agent.config[:'transaction_tracer.explain_threshold']
+              end
               traces = @traces.collect {|trace| trace.prepare_to_send(options)}
               @service.transaction_sample_data(traces)
-            rescue PostTooBigException
-              # we tried to send too much data, drop the first trace and
-              # try again
-              retry if @traces.shift
+              log.debug "Sent slowest sample (#{@service.agent_id}) in #{Time.now - now} seconds"
+            rescue UnrecoverableServerException => e
+              log.debug e.message
             end
-
-            log.debug "Sent slowest sample (#{@service.agent_id}) in #{Time.now - now} seconds"
           end
 
           # if we succeed sending this sample, then we don't need to keep
           # the slowest sample around - it has been sent already and we
           # can clear the collection and move on
@@ -1091,32 +999,34 @@
           harvest_errors
           if @unsent_errors && @unsent_errors.length > 0
             log.debug "Sending #{@unsent_errors.length} errors"
             begin
               @service.error_data(@unsent_errors)
-            rescue PostTooBigException
-              @unsent_errors.shift
-              retry
+            rescue UnrecoverableServerException => e
+              log.debug e.message
             end
             # if the remote invocation fails, then we never clear
             # @unsent_errors, and therefore we can re-attempt to send on
             # the next heartbeat.  Note the error collector maxes out at
             # 20 instances to prevent leakage
             @unsent_errors = []
           end
         end
-        
+
         def transmit_data
           log.debug "Sending data to New Relic Service"
           harvest_and_send_errors
           harvest_and_send_slowest_sample
           harvest_and_send_slowest_sql
           harvest_and_send_timeslice_data
         rescue => e
           retry_count ||= 0
           retry_count += 1
-          retry unless retry_count > 1
+          if retry_count <= 1
+            log.debug "retrying transmit_data after #{e}"
+            retry
+          end
           raise e
         ensure
           NewRelic::Agent::Database.close_connections unless forked?
         end
 
@@ -1137,17 +1047,18 @@
                 @service.shutdown(Time.now.to_f)
               else
                 log.debug "This agent connected from parent process #{@connected_pid}--not sending shutdown"
               end
               log.debug "Graceful disconnect complete"
-            rescue Timeout::Error, StandardError
+            rescue Timeout::Error, StandardError => e
+              log.debug "Error when disconnecting #{e.class.name}: #{e.message}"
             end
           else
             log.debug "Bypassing graceful disconnect - agent not connected"
           end
         end
       end
-            
+
       extend ClassMethods
       include InstanceMethods
       include BrowserMonitoring
     end
   end