lib/new_relic/agent/agent.rb in newrelic_rpm-3.5.5.540.dev vs lib/new_relic/agent/agent.rb in newrelic_rpm-3.5.6.42.beta

- old
+ new

@@ -22,15 +22,19 @@ def initialize @launch_time = Time.now @metric_ids = {} + @events = NewRelic::Agent::EventListener.new @stats_engine = NewRelic::Agent::StatsEngine.new @transaction_sampler = NewRelic::Agent::TransactionSampler.new @sql_sampler = NewRelic::Agent::SqlSampler.new @thread_profiler = NewRelic::Agent::ThreadProfiler.new + @cross_process_monitor = NewRelic::Agent::CrossProcessMonitor.new(@events) @error_collector = NewRelic::Agent::ErrorCollector.new + + @connect_state = :pending @connect_attempts = 0 @last_harvest_time = Time.now @obfuscator = lambda {|sql| NewRelic::Agent::Database.default_sql_obfuscator(sql) } @forked = false @@ -92,10 +96,14 @@ # cross process id's and encoding attr_reader :cross_process_id attr_reader :cross_process_encoding_bytes # service for communicating with collector attr_accessor :service + # Global events dispatcher. This will provides our primary mechanism + # for agent-wide events, such as finishing configuration, error notification + # and request before/after from Rack. + attr_reader :events # Returns the length of the unsent errors array, if it exists, # otherwise nil def unsent_errors_size @@ -173,23 +181,25 @@ # quickly if there is some kind of latency with the server. def after_fork(options={}) @forked = true Agent.config.apply_config(NewRelic::Agent::Configuration::ManualSource.new(options), 1) - # @connected gets false after we fail to connect or have an error - # connecting. @connected has nil if we haven't finished trying to connect. - # or we didn't attempt a connection because this is the master process - if channel_id = options[:report_to_channel] @service = NewRelic::Agent::PipeService.new(channel_id) - @connected_pid = $$ - @metric_ids = {} + if connected? + @connected_pid = $$ + @metric_ids = {} + else + ::NewRelic::Agent.logger.debug("Child process #{$$} not reporting to non-connected parent.") + @service.shutdown(Time.now) + disconnect + end end return if !Agent.config[:agent_enabled] || !Agent.config[:monitor_mode] || - @connected == false || + disconnected? || @worker_thread && @worker_thread.alive? ::NewRelic::Agent.logger.debug "Starting the worker thread in #{$$} after forking." # Clear out stats that are left over from parent process @@ -208,16 +218,10 @@ # True if we have initialized and completed 'start' def started? @started end - # Return nil if not yet connected, true if successfully started - # and false if we failed to start. - def connected? - @connected - end - # Attempt a graceful shutdown of the agent, running the worker # loop if it exists and is running. # # Options: # :force_send => (true/false) # force the agent to send data @@ -516,11 +520,11 @@ # waits a while to reconnect. def handle_force_restart(error) ::NewRelic::Agent.logger.debug error.message reset_stats @metric_ids = {} - @connected = nil + @connect_state = :pending sleep 30 end # when a disconnect is requested, stop the current thread, which # is the worker thread that gathers data and talks to the @@ -575,11 +579,11 @@ # We try to connect. If this returns false that means # the server rejected us for a licensing reason and we should # just exit the thread. If it returns nil # that means it didn't try to connect because we're in the master. connect(connection_options) - if @connected + if connected? log_worker_loop_start create_and_run_worker_loop # never reaches here unless there is a problem or # the agent is exiting else @@ -608,66 +612,47 @@ # This module is an artifact of a refactoring of the connect # method - all of its methods are used in that context, so it # can be refactored at will. It should be fully tested module Connect - # the frequency with which we should try to connect to the - # server at the moment. - attr_accessor :connect_retry_period # number of attempts we've made to contact the server attr_accessor :connect_attempts # Disconnect just sets connected to false, which prevents # the agent from trying to connect again def disconnect - @connected = false + @connect_state = :disconnected true end - # We've tried to connect if @connected is not nil, or if we - # are forcing reconnection (i.e. in the case of an - # after_fork with long running processes) - def tried_to_connect?(options) - !(@connected.nil? || options[:force_reconnect]) + def connected? + @connect_state == :connected end - # We keep trying by default, but you can disable it with the - # :keep_retrying option set to false - def should_keep_retrying?(options) - @keep_retrying = (options[:keep_retrying].nil? || options[:keep_retrying]) + def disconnected? + @connect_state == :disconnected end + # Don't connect if we're already connected, or if we tried to connect + # and were rejected with prejudice because of a license issue, unless + # we're forced to by force_reconnect. + def should_connect?(force=false) + force || (!connected? && !disconnected?) + end + # Retry period is a minute for each failed attempt that # we've made. This should probably do some sort of sane TCP # backoff to prevent hammering the server, but a minute for # each attempt seems to work reasonably well. - def get_retry_period - return 600 if self.connect_attempts > 6 - connect_attempts * 60 + def connect_retry_period + [600, connect_attempts * 60].min end - def increment_retry_period! #:nodoc: - self.connect_retry_period=(get_retry_period) + def note_connect_failure + self.connect_attempts += 1 end - # We should only retry when there has not been a more - # serious condition that would prevent it. We increment the - # connect attempts and the retry period, to prevent constant - # connection attempts, and tell the user what we're doing by - # logging. - def should_retry? - if @keep_retrying - self.connect_attempts=(connect_attempts + 1) - increment_retry_period! - ::NewRelic::Agent.logger.warn "Will re-attempt in #{connect_retry_period} seconds" - true - else - disconnect - false - end - end - # When we have a problem connecting to the server, we need # to tell the user what happened, since this is not an error # we can handle gracefully. def log_error(error) ::NewRelic::Agent.logger.error "Error establishing connection with New Relic Service at #{control.server}:", error @@ -742,28 +727,16 @@ ::NewRelic::Agent.logger.debug "Server provided config: #{config_data.inspect}" server_config = NewRelic::Agent::Configuration::ServerSource.new(config_data) Agent.config.apply_config(server_config, 1) log_connection!(config_data) if @service - @cross_process_id = Agent.config[:cross_process_id] - @cross_process_encoding_key = Agent.config[:encoding_key] - @cross_process_encoding_bytes = get_bytes(@cross_process_encoding_key) unless @cross_process_encoding_key.nil? + # If you're adding something else here to respond to the server-side config, + # use Agent.instance.events.subscribe(:finished_configuring) callback instead! @beacon_configuration = BeaconConfiguration.new end - # Ruby 1.8.6 doesn't support the bytes method on strings. - def get_bytes(value) - return [] if value.nil? - - bytes = [] - value.each_byte do |b| - bytes << b - end - bytes - end - # Logs when we connect to the server, for debugging purposes # - makes sure we know if an agent has not connected def log_connection!(config_data) ::NewRelic::Agent.logger.debug "Connected to NewRelic Service at #{@service.collector.name}" ::NewRelic::Agent.logger.debug "Agent Run = #{@service.agent_id}." @@ -823,11 +796,11 @@ end public :merge_data_from # Connect to the server and validate the license. If successful, - # @connected has true when finished. If not successful, you can + # connected? returns true when finished. If not successful, you can # keep calling this. Return false if we could not establish a # connection with the server and we should not retry, such as if # there's a bad license key. # # Set keep_retrying=false to disable retrying and return asap, such as when @@ -838,29 +811,31 @@ # return with the connection set to nil. This ensures we may try again # later (default true). # * <tt>force_reconnect => true</tt> if you want to establish a new connection # to the server before running the worker loop. This means you get a separate # agent run and New Relic sees it as a separate instance (default is false). - def connect(options) - # Don't proceed if we already connected (@connected=true) or if we tried - # to connect and were rejected with prejudice because of a license issue - # (@connected=false), unless we're forced to by force_reconnect. - return if tried_to_connect?(options) + def connect(options={}) + defaults = { + :keep_retrying => true, + :force_reconnect => false + } + opts = defaults.merge(options) - # wait a few seconds for the web server to boot, necessary in development - @connect_retry_period = should_keep_retrying?(options) ? 10 : 0 + return unless should_connect?(opts[:force_reconnect]) - sleep connect_retry_period ::NewRelic::Agent.logger.debug "Connecting Process to New Relic: #$0" query_server_for_configuration @connected_pid = $$ - @connected = true + @connect_state = :connected rescue NewRelic::Agent::LicenseException => e handle_license_error(e) rescue Timeout::Error, StandardError => e log_error(e) - if should_retry? + if opts[:keep_retrying] + note_connect_failure + ::NewRelic::Agent.logger.warn "Will re-attempt in #{connect_retry_period} seconds" + sleep connect_retry_period retry else disconnect end end @@ -874,17 +849,10 @@ # directory of this project def determine_home_directory control.root end - # Checks whether this process is a Passenger or Unicorn - # spawning server - if so, we probably don't intend to report - # statistics from this process - def is_application_spawner? - $0 =~ /ApplicationSpawner|^unicorn\S* master/ - end - # calls the busy harvester and collects timeslice data to # send later def harvest_timeslice_data(time=Time.now) # this creates timeslices that are harvested below NewRelic::Agent::BusyCalculator.harvest_busy @@ -1069,10 +1037,10 @@ # lifetime of the process # # If this process comes from a parent process, it will not # disconnect, so that the parent process can continue to send data def graceful_disconnect - if @connected + if connected? begin @service.request_timeout = 10 transmit_data(true) if @connected_pid == $$ && !@service.kind_of?(NewRelic::Agent::NewRelicService)