agent.rb in newrelic_rpm-2.13.5.beta1

- old
+ new

@@ -35,10 +35,11 @@
         @histogram = NewRelic::Histogram.new(NewRelic::Control.instance.apdex_t / 10)
         @stats_engine = NewRelic::Agent::StatsEngine.new
         @transaction_sampler = NewRelic::Agent::TransactionSampler.new
         @stats_engine.transaction_sampler = @transaction_sampler
         @error_collector = NewRelic::Agent::ErrorCollector.new
+        @connect_attempts = 0
 
         @request_timeout = NewRelic::Control.instance.fetch('timeout', 2 * 60)
 
         @last_harvest_time = Time.now
         @obfuscator = method(:default_sql_obfuscator)
@@ -92,15 +93,10 @@
             error_collector.notice_error e, :uri => options['uri'], :metric => metric
           end
           # busy time ?
         end
 
-        # This method is deprecated.  Use NewRelic::Agent.manual_start
-        def manual_start(ignored=nil, also_ignored=nil)
-          raise "This method no longer supported.  Instead use the class method NewRelic::Agent.manual_start"
-        end
-
         # This method should be called in a forked process after a fork.
         # It assumes the parent process initialized the agent, but does
         # not assume the agent started.
         #
         # The call is idempotent, but not re-entrant.
@@ -224,86 +220,164 @@
         end
 
         def log
           NewRelic::Agent.logger
         end
+        
+        # Herein lies the corpse of the former 'start' method. May
+        # it's unmatched flog score rest in pieces.
+        module Start
+          def already_started?
+            if started?
+              control.log!("Agent Started Already!", :error)
+              true
+            end
+          end
 
-        # Start up the agent.  This verifies that the agent_enabled? is
-        # true and initializes the sampler based on the current
-        # configuration settings.  Then it will fire up the background
-        # thread for sending data to the server if applicable.
-        def start
-          if started?
-            control.log! "Agent Started Already!", :error
-            return
+          def disabled?
+            !control.agent_enabled?
           end
-          return if !control.agent_enabled?
-          @started = true
-          @local_host = determine_host
+          
+          def log_dispatcher
+            dispatcher_name = control.dispatcher.to_s
+            return if log_if(dispatcher_name.empty?, :info, "No dispatcher detected.")
+            log.info "Dispatcher: #{dispatcher_name}"
+          end
+          
+          def log_app_names
+            log.info "Application: #{control.app_names.join(", ")}"
+          end
 
-          if control.dispatcher.nil? || control.dispatcher.to_s.empty?
-            log.info "No dispatcher detected."
-          else
-            log.info "Dispatcher: #{control.dispatcher.to_s}"
+          def apdex_f
+            (4 * NewRelic::Control.instance.apdex_t).to_f
           end
-          log.info "Application: #{control.app_names.join(", ")}" unless control.app_names.empty?
 
-          sampler_config = control.fetch('transaction_tracer', {})
-          # TODO: Should move this state into the transaction sampler instance
-          @should_send_samples = @config_should_send_samples = sampler_config.fetch('enabled', true)
-          @should_send_random_samples = sampler_config.fetch('random_sample', false)
-          @explain_threshold = sampler_config.fetch('explain_threshold', 0.5).to_f
-          @explain_enabled = sampler_config.fetch('explain_enabled', true)
-          @record_sql = sampler_config.fetch('record_sql', :obfuscated).to_sym
+          def apdex_f_threshold?
+            sampler_config.fetch('transaction_threshold', '') =~ /apdex_f/i            
+          end
 
-          # use transaction_threshold: 4.0 to force the TT collection
-          # threshold to 4 seconds
-          # use transaction_threshold: apdex_f to use your apdex t value
-          # multiplied by 4
-          # undefined transaction_threshold defaults to 2.0
-          apdex_f = 4 * NewRelic::Control.instance.apdex_t
-          @slowest_transaction_threshold = sampler_config.fetch('transaction_threshold', 2.0)
-          if @slowest_transaction_threshold =~ /apdex_f/i
-            @slowest_transaction_threshold = apdex_f
+          def set_sql_recording!
+            @record_sql = sampler_config.fetch('record_sql', :obfuscated).to_sym
+            log_sql_transmission_warning?
           end
-          @slowest_transaction_threshold = @slowest_transaction_threshold.to_f
 
-          log.warn "Agent is configured to send raw SQL to RPM service" if @record_sql == :raw
+          def log_sql_transmission_warning?
+            log_if((@record_sql == :raw), :warn, "Agent is configured to send raw SQL to RPM service")
+          end
 
-          case
-          when !control.monitor_mode?
-            log.warn "Agent configured not to send data in this environment - edit newrelic.yml to change this"
-          when !control.license_key
-            log.error "No license key found.  Please edit your newrelic.yml file and insert your license key."
-          when control.license_key.length != 40
-            log.error "Invalid license key: #{control.license_key}"
-          when [:passenger, :unicorn].include?(control.dispatcher)
-            log.info "Connecting workers after forking."
-          else
-            # Do the connect in the foreground if we are in sync mode
-            NewRelic::Agent.disable_all_tracing { connect(:keep_retrying => false) } if control.sync_startup
+          def sampler_config
+            control.fetch('transaction_tracer', {})
+          end
+          
+          # this entire method should be done on the transaction
+          # sampler object, rather than here. We should pass in the
+          # sampler config.
+          def config_transaction_tracer
+            @should_send_samples = @config_should_send_samples = sampler_config.fetch('enabled', true)
+            @should_send_random_samples = sampler_config.fetch('random_sample', false)
+            @explain_threshold = sampler_config.fetch('explain_threshold', 0.5).to_f
+            @explain_enabled = sampler_config.fetch('explain_enabled', true)
+            set_sql_recording!
+            
+            # default to 2.0, string 'apdex_f' will turn into your
+            # apdex * 4
+            @slowest_transaction_threshold = sampler_config.fetch('transaction_threshold', 2.0).to_f
+            @slowest_transaction_threshold = apdex_f if apdex_f_threshold?
+          end
 
-            # Start the event loop and initiate connection if necessary
-            start_worker_thread
+          def connect_in_foreground
+            NewRelic::Agent.disable_all_tracing { connect(:keep_retrying => false) }
+          end
 
-            # Our shutdown handler needs to run after other shutdown handlers
-            # that may be doing things like running the app (hello sinatra).
-            if control.send_data_on_exit
-              if RUBY_VERSION =~ /rubinius/i
-                list = at_exit { shutdown }
-                # move the shutdown handler to the front of the list, to
-                # execute last:
-                list.unshift(list.pop)
-              elsif !defined?(JRuby) or !defined?(Sinatra::Application)
-                at_exit { at_exit { shutdown } }
-              end
+          def using_rubinius?
+            RUBY_VERSION =~ /rubinius/i            
+          end
+          
+          def using_jruby?
+            defined?(JRuby) 
+          end
+          
+          def using_sinatra?
+            defined?(Sinatra::Application)
+          end
+          
+          # we should not set an at_exit block if people are using
+          # these as they don't do standard at_exit behavior per MRI/YARV
+          def weird_ruby?
+            using_rubinius? || using_jruby? || using_sinatra?
+          end
+          
+          def install_exit_handler
+            if control.send_data_on_exit && !weird_ruby?
+              # Our shutdown handler needs to run after other shutdown handlers
+              at_exit { at_exit { shutdown } }
             end
           end
-          log.info "New Relic RPM Agent #{NewRelic::VERSION::STRING} Initialized: pid = #$$"
-          log.info "Agent Log found in #{NewRelic::Control.instance.log_file}" if NewRelic::Control.instance.log_file
+
+          def notify_log_file_location
+            log_file = NewRelic::Control.instance.log_file
+            log_if(log_file, :info, "Agent Log found in #{log_file}")
+          end
+
+          def log_version_and_pid
+            log.info "New Relic RPM Agent #{NewRelic::VERSION::STRING} Initialized: pid = #{$$}"
+          end
+          
+          def log_if(boolean, level, message)
+            self.log.send(level, message) if boolean
+            boolean
+          end
+
+          def log_unless(boolean, level, message)
+            self.log.send(level, message) unless boolean
+            boolean
+          end
+          
+          def monitoring?
+            log_unless(control.monitor_mode?, :warn, "Agent configured not to send data in this environment - edit newrelic.yml to change this")
+          end
+
+          def has_license_key?
+            log_unless(control.license_key, :error, "No license key found.  Please edit your newrelic.yml file and insert your license key.")
+          end
+
+          def has_correct_license_key?
+            has_license_key? && correct_license_length
+          end
+          
+          def correct_license_length
+            key = control.license_key
+            log_unless((key.length == 40), :error, "Invalid license key: #{key}")
+          end
+
+          def using_forking_dispatcher?
+            log_if([:passenger, :unicorn].include?(control.dispatcher), :info, "Connecting workers after forking.")
+          end
+            
+          def check_config_and_start_agent
+            return unless monitoring? && has_correct_license_key?
+            return if using_forking_dispatcher?
+            connect_in_foreground if control.sync_startup
+            start_worker_thread
+            install_exit_handler
+          end
         end
 
+        include Start
+
+        def start
+          return if already_started? || disabled?
+          @started = true
+          @local_host = determine_host
+          log_dispatcher
+          log_app_names
+          config_transaction_tracer
+          check_config_and_start_agent
+          log_version_and_pid
+          notify_log_file_location
+        end
+
         # Clear out the metric data, errors, and transaction traces.  Reset the histogram data.
         def reset_stats
           @stats_engine.reset_stats
           @unsent_errors = []
           @traces = nil
@@ -316,181 +390,296 @@
         private
         def collector
           @collector ||= control.server
         end
 
-        # Try to launch the worker thread and connect to the server.
-        #
-        # See #connect for a description of connection_options.
-        def start_worker_thread(connection_options = {})
-          log.debug "Creating RPM worker thread."
-          @worker_thread = Thread.new do
-            begin
+        module StartWorkerThread
+
+          def check_transaction_sampler_status
+            # disable transaction sampling if disabled by the server
+            # and we're not in dev mode
+            if control.developer_mode? || @should_send_samples
+              @transaction_sampler.enable
+            else
+              @transaction_sampler.disable
+            end
+          end
+
+          def log_worker_loop_start
+            log.info "Reporting performance data every #{@report_period} seconds."
+            log.debug "Running worker loop"
+          end
+
+          def create_and_run_worker_loop
+            @worker_loop = WorkerLoop.new
+            @worker_loop.run(@report_period) do
+              harvest_and_send_timeslice_data
+              harvest_and_send_slowest_sample if @should_send_samples
+              harvest_and_send_errors if error_collector.enabled
+            end
+          end
+
+          def handle_force_restart(error)
+            log.info error.message
+            # disconnect and start over.
+            # clear the stats engine
+            reset_stats
+            @metric_ids = {}
+            @connected = nil
+            # Wait a short time before trying to reconnect
+            sleep 30
+          end
+
+          def handle_force_disconnect(error)
+            # when a disconnect is requested, stop the current thread, which
+            # is the worker thread that gathers data and talks to the
+            # server.
+            log.error "RPM forced this agent to disconnect (#{error.message})"
+            disconnect
+          end
+
+          def handle_server_connection_problem(error)
+            log.error "Unable to establish connection with the server.  Run with log level set to debug for more information."
+            log.debug("#{error.class.name}: #{error.message}\n#{error.backtrace.first}")
+            disconnect
+          end
+
+          def handle_other_error(error)
+            log.error "Terminating worker loop: #{error.class.name}: #{error.message}\n  #{error.backtrace.join("\n  ")}"
+            disconnect
+          end
+
+          def catch_errors
+            yield
+          rescue NewRelic::Agent::ForceRestartException => e
+            handle_force_restart(e)
+            retry
+          rescue NewRelic::Agent::ForceDisconnectException => e
+            handle_force_disconnect(e)
+          rescue NewRelic::Agent::ServerConnectionException => e
+            handle_server_connection_problem(e)
+          rescue Exception => e
+            handle_other_error(e)
+          end
+          
+          def deferred_work!(connection_options)
+            catch_errors do
               NewRelic::Agent.disable_all_tracing do
                 # We try to connect.  If this returns false that means
                 # the server rejected us for a licensing reason and we should
                 # just exit the thread.  If it returns nil
                 # that means it didn't try to connect because we're in the master.
                 connect(connection_options)
                 if @connected
-                  # disable transaction sampling if disabled by the server and we're not in dev mode
-                  if !control.developer_mode? && !@should_send_samples
-                    @transaction_sampler.disable
-                  else
-                    @transaction_sampler.enable # otherwise ensure TT's are enabled
-                  end
-
-                  log.info "Reporting performance data every #{@report_period} seconds."
-                  log.debug "Running worker loop"
-                  # Note if the agent attempts to report more frequently than allowed by the server
-                  # the server will start dropping data.
-                  @worker_loop = WorkerLoop.new
-                  @worker_loop.run(@report_period) do
-                    harvest_and_send_timeslice_data
-                    harvest_and_send_slowest_sample if @should_send_samples
-                    harvest_and_send_errors if error_collector.enabled
-                  end
+                  check_transaction_sampler_status
+                  log_worker_loop_start
+                  create_and_run_worker_loop
                 else
-                  log.debug "No connection.  Worker thread finished."
+                  log.debug "No connection.  Worker thread ending."
                 end
               end
-            rescue NewRelic::Agent::ForceRestartException => e
-              log.info e.message
-              # disconnect and start over.
-              # clear the stats engine
-              reset_stats
-              @metric_ids = {}
-              @connected = nil
-              # Wait a short time before trying to reconnect
-              sleep 30
-              retry
-            rescue NewRelic::Agent::ForceDisconnectException => e
-              # when a disconnect is requested, stop the current thread, which
-              # is the worker thread that gathers data and talks to the
-              # server.
-              log.error "RPM forced this agent to disconnect (#{e.message})"
-              @connected = false
-            rescue NewRelic::Agent::ServerConnectionException => e
-              log.error "Unable to establish connection with the server.  Run with log level set to debug for more information."
-              log.debug("#{e.class.name}: #{e.message}\n#{e.backtrace.first}")
-              @connected = false
-            rescue Exception => e
-              log.error "Terminating worker loop: #{e.class.name}: #{e}\n  #{e.backtrace.join("\n  ")}"
-              @connected = false
-            end # begin
+            end
+          end
+        end
+        include StartWorkerThread
+        
+        # Try to launch the worker thread and connect to the server.
+        #
+        # See #connect for a description of connection_options.
+        def start_worker_thread(connection_options = {})
+          log.debug "Creating RPM worker thread."
+          @worker_thread = Thread.new do
+            deferred_work!(connection_options)
           end # thread new
           @worker_thread['newrelic_label'] = 'Worker Loop'
         end
 
         def control
           NewRelic::Control.instance
         end
+        
+        module Connect
+          attr_accessor :connect_retry_period
+          attr_accessor :connect_attempts
 
-        # Connect to the server and validate the license.  If successful,
-        # @connected has true when finished.  If not successful, you can
-        # keep calling this.  Return false if we could not establish a
-        # connection with the server and we should not retry, such as if
-        # there's a bad license key.
-        #
-        # Set keep_retrying=false to disable retrying and return asap, such as when
-        # invoked in the foreground.  Otherwise this runs until a successful
-        # connection is made, or the server rejects us.
-        #
-        # * <tt>:keep_retrying => false</tt> to only try to connect once, and
-        #   return with the connection set to nil.  This ensures we may try again
-        #   later (default true).
-        # * <tt>force_reconnect => true</tt> if you want to establish a new connection
-        #   to the server before running the worker loop.  This means you get a separate
-        #   agent run and RPM sees it as a separate instance (default is false).
-        def connect(options)
-          # Don't proceed if we already connected (@connected=true) or if we tried
-          # to connect and were rejected with prejudice because of a license issue
-          # (@connected=false).
-          return if !@connected.nil? && !options[:force_reconnect]
-          keep_retrying = options[:keep_retrying].nil? || options[:keep_retrying]
+          def disconnect
+            @connected = false
+            true
+          end
 
-          # wait a few seconds for the web server to boot, necessary in development
-          connect_retry_period = keep_retrying ? 10 : 0
-          connect_attempts = 0
-          @agent_id = nil
-          begin
-            sleep connect_retry_period.to_i
-            log.debug "Connecting Process to RPM: #$0"
-            host = invoke_remote(:get_redirect_host)
-            @collector = control.server_from_host(host) if host
-            environment = control['send_environment_info'] != false ? control.local_env.snapshot : []
-            log.debug "Connecting with validation seed/token: #{control.validate_seed}/#{control.validate_token}" if control.validate_seed
-            connect_data = invoke_remote :connect,
+          def tried_to_connect?(options)
+            !(@connected.nil? || options[:force_reconnect])
+          end
+
+          def should_keep_retrying?(options)
+            @keep_retrying = (options[:keep_retrying].nil? || options[:keep_retrying])
+          end
+
+          def get_retry_period
+            return 600 if self.connect_attempts > 6
+            connect_attempts * 60
+          end
+
+          def increment_retry_period!
+            self.connect_retry_period=(get_retry_period)
+          end
+
+          def should_retry?
+            if @keep_retrying
+              self.connect_attempts=(connect_attempts + 1)
+              increment_retry_period!
+              log.info "Will re-attempt in #{connect_retry_period} seconds"
+              true
+            else
+              disconnect
+              false
+            end
+          end
+
+          def log_error(error)
+            log.error "Error establishing connection with New Relic RPM Service at #{control.server}: #{error.message}"
+            log.debug error.backtrace.join("\n")
+          end
+
+          def handle_license_error(error)
+            log.error error.message
+            log.info "Visit NewRelic.com to obtain a valid license key, or to upgrade your account."
+            disconnect
+          end
+          
+          def log_seed_token
+            if control.validate_seed
+              log.debug "Connecting with validation seed/token: #{control.validate_seed}/#{control.validate_token}"
+            end
+          end
+
+          def environment_for_connect
+            control['send_environment_info'] != false ? control.local_env.snapshot : []
+          end
+
+          def validate_settings
+            {
+              :seed => control.validate_seed,
+              :token => control.validate_token
+            }
+          end
+
+          def connect_settings
+            {
               :pid => $$,
               :host => @local_host,
               :app_name => control.app_names,
               :language => 'ruby',
               :agent_version => NewRelic::VERSION::STRING,
-              :environment => environment,
+              :environment => environment_for_connect,
               :settings => control.settings,
-              :validate => {:seed => control.validate_seed,
-                            :token => control.validate_token }
+              :validate => validate_settings
+            }
+          end
+          def connect_to_server
+            log_seed_token
+            connect_data = invoke_remote(:connect, connect_settings)
+          end
 
-            @agent_id = connect_data['agent_run_id']
-            @report_period = connect_data['data_report_period']
-            @url_rules = connect_data['url_rules']
+          def configure_error_collector!(server_enabled)
+            # Ask for permission to collect error data
+            enabled = if error_collector.config_enabled && server_enabled
+                        error_collector.enabled = true
+                      else
+                        error_collector.enabled = false
+                      end
+            log.debug "Errors will #{enabled ? '' : 'not '}be sent to the RPM service."
+          end
+          
+          def enable_random_samples!(sample_rate)
+            @transaction_sampler.random_sampling = true
+            @transaction_sampler.sampling_rate = sample_rate
+            log.info "Transaction sampling enabled, rate = #{@transaction_sampler.sampling_rate}"
+          end
+          
 
-            control.log! "Connected to NewRelic Service at #{@collector}"
-            log.debug "Agent Run       = #{@agent_id}."
-            log.debug "Connection data = #{connect_data.inspect}"
-
+          def configure_transaction_tracer!(server_enabled, sample_rate)
             # Ask the server for permission to send transaction samples.
             # determined by subscription license.
-            @should_send_samples = @config_should_send_samples && connect_data['collect_traces']
+            @should_send_samples = @config_should_send_samples && server_enabled
 
             if @should_send_samples
-              if @should_send_random_samples
-                @transaction_sampler.random_sampling = true
-                @transaction_sampler.sampling_rate = connect_data['sampling_rate']
-                log.info "Transaction sampling enabled, rate = #{@transaction_sampler.sampling_rate}"
-              end
+              # I don't think this is ever true, but...
+              enable_random_samples!(sample_rate) if @should_send_random_samples
               log.debug "Transaction tracing threshold is #{@slowest_transaction_threshold} seconds."
             else
               log.debug "Transaction traces will not be sent to the RPM service."
             end
+          end
 
-            # Ask for permission to collect error data
-            error_collector.enabled = error_collector.config_enabled && connect_data['collect_errors']
+          def set_collector_host!
+            host = invoke_remote(:get_redirect_host)
+            if host
+              @collector = control.server_from_host(host)
+            end
+          end
 
-            log.debug "Errors will be sent to the RPM service." if error_collector.enabled
+          def query_server_for_configuration
+            set_collector_host!
+            
+            finish_setup(connect_to_server)
+          end
+          def finish_setup(config_data)
+            @agent_id = config_data['agent_run_id']
+            @report_period = config_data['data_report_period']
+            @url_rules = config_data['url_rules']
 
-            @connected_pid = $$
-            @connected = true
+            log_connection!(config_data)
+            configure_transaction_tracer!(config_data['collect_traces'], config_data['sample_rate'])
+            configure_error_collector!(config_data['collect_errors'])
+          end
 
-          rescue NewRelic::Agent::LicenseException => e
-            log.error e.message
-            log.info "Visit NewRelic.com to obtain a valid license key, or to upgrade your account."
-            @connected = false
+          def log_connection!(config_data)
+            control.log! "Connected to NewRelic Service at #{@collector}"
+            log.debug "Agent Run       = #{@agent_id}."
+            log.debug "Connection data = #{config_data.inspect}"
+          end
+        end
+        include Connect
 
-          rescue Timeout::Error, StandardError => e
-            if e.instance_of? NewRelic::Agent::ServerConnectionException
-              log.info "Unable to establish connection with New Relic RPM Service at #{control.server}: #{e.message}"
-              log.debug e.backtrace.join("\n")
-            else
-              log.error "Error establishing connection with New Relic RPM Service at #{control.server}: #{e.message}"
-              log.debug e.backtrace.join("\n")
-            end
-            # retry logic
-            if keep_retrying
-              connect_attempts += 1
-              case connect_attempts
-              when 1..2
-                connect_retry_period, period_msg = 60, "1 minute"
-              when 3..5
-                connect_retry_period, period_msg = 60 * 2, "2 minutes"
-              else
-                connect_retry_period, period_msg = 5 * 60, "5 minutes"
-              end
-              log.info "Will re-attempt in #{period_msg}"
-              retry
-            else
-              @connected = nil
-            end
+        # Connect to the server and validate the license.  If successful,
+        # @connected has true when finished.  If not successful, you can
+        # keep calling this.  Return false if we could not establish a
+        # connection with the server and we should not retry, such as if
+        # there's a bad license key.
+        #
+        # Set keep_retrying=false to disable retrying and return asap, such as when
+        # invoked in the foreground.  Otherwise this runs until a successful
+        # connection is made, or the server rejects us.
+        #
+        # * <tt>:keep_retrying => false</tt> to only try to connect once, and
+        #   return with the connection set to nil.  This ensures we may try again
+        #   later (default true).
+        # * <tt>force_reconnect => true</tt> if you want to establish a new connection
+        #   to the server before running the worker loop.  This means you get a separate
+        #   agent run and RPM sees it as a separate instance (default is false).
+        def connect(options)
+          # Don't proceed if we already connected (@connected=true) or if we tried
+          # to connect and were rejected with prejudice because of a license issue
+          # (@connected=false), unless we're forced to by force_reconnect.
+          return if tried_to_connect?(options)
+
+          # wait a few seconds for the web server to boot, necessary in development
+          @connect_retry_period = should_keep_retrying?(options) ? 10 : 0
+
+          sleep connect_retry_period
+          log.debug "Connecting Process to RPM: #$0"
+          query_server_for_configuration
+          @connected_pid = $$
+          @connected = true
+        rescue NewRelic::Agent::LicenseException => e
+          handle_license_error(e)
+        rescue Timeout::Error, StandardError => e
+          log_error(e)
+          if should_retry?
+            retry
+          else
+            disconnect
           end
         end
 
         def determine_host
           Socket.gethostname