agent.rb in newrelic_rpm-3.5.6.42.beta

- old
+ new

@@ -22,15 +22,19 @@
 
       def initialize
         @launch_time = Time.now
 
         @metric_ids = {}
+        @events = NewRelic::Agent::EventListener.new
         @stats_engine = NewRelic::Agent::StatsEngine.new
         @transaction_sampler = NewRelic::Agent::TransactionSampler.new
         @sql_sampler = NewRelic::Agent::SqlSampler.new
         @thread_profiler = NewRelic::Agent::ThreadProfiler.new
+        @cross_process_monitor = NewRelic::Agent::CrossProcessMonitor.new(@events)
         @error_collector = NewRelic::Agent::ErrorCollector.new
+
+        @connect_state = :pending
         @connect_attempts = 0
 
         @last_harvest_time = Time.now
         @obfuscator = lambda {|sql| NewRelic::Agent::Database.default_sql_obfuscator(sql) }
         @forked = false
@@ -92,10 +96,14 @@
         # cross process id's and encoding
         attr_reader :cross_process_id
         attr_reader :cross_process_encoding_bytes
         # service for communicating with collector
         attr_accessor :service
+        # Global events dispatcher. This will provides our primary mechanism
+        # for agent-wide events, such as finishing configuration, error notification
+        # and request before/after from Rack.
+        attr_reader :events
 
 
         # Returns the length of the unsent errors array, if it exists,
         # otherwise nil
         def unsent_errors_size
@@ -173,23 +181,25 @@
         #   quickly if there is some kind of latency with the server.
         def after_fork(options={})
           @forked = true
           Agent.config.apply_config(NewRelic::Agent::Configuration::ManualSource.new(options), 1)
 
-          # @connected gets false after we fail to connect or have an error
-          # connecting.  @connected has nil if we haven't finished trying to connect.
-          # or we didn't attempt a connection because this is the master process
-
           if channel_id = options[:report_to_channel]
             @service = NewRelic::Agent::PipeService.new(channel_id)
-            @connected_pid = $$
-            @metric_ids = {}
+            if connected?
+              @connected_pid = $$
+              @metric_ids = {}
+            else
+              ::NewRelic::Agent.logger.debug("Child process #{$$} not reporting to non-connected parent.")
+              @service.shutdown(Time.now)
+              disconnect
+            end
           end
 
           return if !Agent.config[:agent_enabled] ||
             !Agent.config[:monitor_mode] ||
-            @connected == false ||
+            disconnected? ||
             @worker_thread && @worker_thread.alive?
 
           ::NewRelic::Agent.logger.debug "Starting the worker thread in #{$$} after forking."
 
           # Clear out stats that are left over from parent process
@@ -208,16 +218,10 @@
         # True if we have initialized and completed 'start'
         def started?
           @started
         end
 
-        # Return nil if not yet connected, true if successfully started
-        # and false if we failed to start.
-        def connected?
-          @connected
-        end
-
         # Attempt a graceful shutdown of the agent, running the worker
         # loop if it exists and is running.
         #
         # Options:
         # :force_send => (true/false) # force the agent to send data
@@ -516,11 +520,11 @@
           # waits a while to reconnect.
           def handle_force_restart(error)
             ::NewRelic::Agent.logger.debug error.message
             reset_stats
             @metric_ids = {}
-            @connected = nil
+            @connect_state = :pending
             sleep 30
           end
 
           # when a disconnect is requested, stop the current thread, which
           # is the worker thread that gathers data and talks to the
@@ -575,11 +579,11 @@
                 # We try to connect.  If this returns false that means
                 # the server rejected us for a licensing reason and we should
                 # just exit the thread.  If it returns nil
                 # that means it didn't try to connect because we're in the master.
                 connect(connection_options)
-                if @connected
+                if connected?
                   log_worker_loop_start
                   create_and_run_worker_loop
                   # never reaches here unless there is a problem or
                   # the agent is exiting
                 else
@@ -608,66 +612,47 @@
 
         # This module is an artifact of a refactoring of the connect
         # method - all of its methods are used in that context, so it
         # can be refactored at will. It should be fully tested
         module Connect
-          # the frequency with which we should try to connect to the
-          # server at the moment.
-          attr_accessor :connect_retry_period
           # number of attempts we've made to contact the server
           attr_accessor :connect_attempts
 
           # Disconnect just sets connected to false, which prevents
           # the agent from trying to connect again
           def disconnect
-            @connected = false
+            @connect_state = :disconnected
             true
           end
 
-          # We've tried to connect if @connected is not nil, or if we
-          # are forcing reconnection (i.e. in the case of an
-          # after_fork with long running processes)
-          def tried_to_connect?(options)
-            !(@connected.nil? || options[:force_reconnect])
+          def connected?
+            @connect_state == :connected
           end
 
-          # We keep trying by default, but you can disable it with the
-          # :keep_retrying option set to false
-          def should_keep_retrying?(options)
-            @keep_retrying = (options[:keep_retrying].nil? || options[:keep_retrying])
+          def disconnected?
+            @connect_state == :disconnected
           end
 
+          # Don't connect if we're already connected, or if we tried to connect
+          # and were rejected with prejudice because of a license issue, unless
+          # we're forced to by force_reconnect.
+          def should_connect?(force=false)
+            force || (!connected? && !disconnected?)
+          end
+
           # Retry period is a minute for each failed attempt that
           # we've made. This should probably do some sort of sane TCP
           # backoff to prevent hammering the server, but a minute for
           # each attempt seems to work reasonably well.
-          def get_retry_period
-            return 600 if self.connect_attempts > 6
-            connect_attempts * 60
+          def connect_retry_period
+            [600, connect_attempts * 60].min
           end
 
-          def increment_retry_period! #:nodoc:
-            self.connect_retry_period=(get_retry_period)
+          def note_connect_failure
+            self.connect_attempts += 1
           end
 
-          # We should only retry when there has not been a more
-          # serious condition that would prevent it. We increment the
-          # connect attempts and the retry period, to prevent constant
-          # connection attempts, and tell the user what we're doing by
-          # logging.
-          def should_retry?
-            if @keep_retrying
-              self.connect_attempts=(connect_attempts + 1)
-              increment_retry_period!
-              ::NewRelic::Agent.logger.warn "Will re-attempt in #{connect_retry_period} seconds"
-              true
-            else
-              disconnect
-              false
-            end
-          end
-
           # When we have a problem connecting to the server, we need
           # to tell the user what happened, since this is not an error
           # we can handle gracefully.
           def log_error(error)
             ::NewRelic::Agent.logger.error "Error establishing connection with New Relic Service at #{control.server}:", error
@@ -742,28 +727,16 @@
             ::NewRelic::Agent.logger.debug "Server provided config: #{config_data.inspect}"
             server_config = NewRelic::Agent::Configuration::ServerSource.new(config_data)
             Agent.config.apply_config(server_config, 1)
             log_connection!(config_data) if @service
 
-            @cross_process_id = Agent.config[:cross_process_id]
-            @cross_process_encoding_key = Agent.config[:encoding_key]
-            @cross_process_encoding_bytes = get_bytes(@cross_process_encoding_key) unless @cross_process_encoding_key.nil?
+            # If you're adding something else here to respond to the server-side config,
+            # use Agent.instance.events.subscribe(:finished_configuring) callback instead!
 
             @beacon_configuration = BeaconConfiguration.new
           end
 
-          # Ruby 1.8.6 doesn't support the bytes method on strings.
-          def get_bytes(value)
-            return [] if value.nil?
-
-            bytes = []
-            value.each_byte do |b|
-              bytes << b
-            end
-            bytes
-          end
-
           # Logs when we connect to the server, for debugging purposes
           # - makes sure we know if an agent has not connected
           def log_connection!(config_data)
             ::NewRelic::Agent.logger.debug "Connected to NewRelic Service at #{@service.collector.name}"
             ::NewRelic::Agent.logger.debug "Agent Run       = #{@service.agent_id}."
@@ -823,11 +796,11 @@
         end
 
         public :merge_data_from
 
         # Connect to the server and validate the license.  If successful,
-        # @connected has true when finished.  If not successful, you can
+        # connected? returns true when finished.  If not successful, you can
         # keep calling this.  Return false if we could not establish a
         # connection with the server and we should not retry, such as if
         # there's a bad license key.
         #
         # Set keep_retrying=false to disable retrying and return asap, such as when
@@ -838,29 +811,31 @@
         #   return with the connection set to nil.  This ensures we may try again
         #   later (default true).
         # * <tt>force_reconnect => true</tt> if you want to establish a new connection
         #   to the server before running the worker loop.  This means you get a separate
         #   agent run and New Relic sees it as a separate instance (default is false).
-        def connect(options)
-          # Don't proceed if we already connected (@connected=true) or if we tried
-          # to connect and were rejected with prejudice because of a license issue
-          # (@connected=false), unless we're forced to by force_reconnect.
-          return if tried_to_connect?(options)
+        def connect(options={})
+          defaults = {
+            :keep_retrying => true,
+            :force_reconnect => false
+          }
+          opts = defaults.merge(options)
 
-          # wait a few seconds for the web server to boot, necessary in development
-          @connect_retry_period = should_keep_retrying?(options) ? 10 : 0
+          return unless should_connect?(opts[:force_reconnect])
 
-          sleep connect_retry_period
           ::NewRelic::Agent.logger.debug "Connecting Process to New Relic: #$0"
           query_server_for_configuration
           @connected_pid = $$
-          @connected = true
+          @connect_state = :connected
         rescue NewRelic::Agent::LicenseException => e
           handle_license_error(e)
         rescue Timeout::Error, StandardError => e
           log_error(e)
-          if should_retry?
+          if opts[:keep_retrying]
+            note_connect_failure
+            ::NewRelic::Agent.logger.warn "Will re-attempt in #{connect_retry_period} seconds"
+            sleep connect_retry_period
             retry
           else
             disconnect
           end
         end
@@ -874,17 +849,10 @@
         # directory of this project
         def determine_home_directory
           control.root
         end
 
-        # Checks whether this process is a Passenger or Unicorn
-        # spawning server - if so, we probably don't intend to report
-        # statistics from this process
-        def is_application_spawner?
-          $0 =~ /ApplicationSpawner|^unicorn\S* master/
-        end
-
         # calls the busy harvester and collects timeslice data to
         # send later
         def harvest_timeslice_data(time=Time.now)
           # this creates timeslices that are harvested below
           NewRelic::Agent::BusyCalculator.harvest_busy
@@ -1069,10 +1037,10 @@
         # lifetime of the process
         #
         # If this process comes from a parent process, it will not
         # disconnect, so that the parent process can continue to send data
         def graceful_disconnect
-          if @connected
+          if connected?
             begin
               @service.request_timeout = 10
               transmit_data(true)
 
               if @connected_pid == $$ && !@service.kind_of?(NewRelic::Agent::NewRelicService)