lib/new_relic/agent/agent.rb in ghazel-newrelic_rpm-3.4.0.2 vs lib/new_relic/agent/agent.rb in ghazel-newrelic_rpm-3.5.4

- old
+ new

@@ -4,60 +4,78 @@ require 'logger' require 'zlib' require 'stringio' require 'new_relic/agent/new_relic_service' require 'new_relic/agent/pipe_service' +require 'new_relic/agent/configuration/manager' +require 'new_relic/agent/database' +require 'new_relic/agent/thread_profiler' module NewRelic module Agent # The Agent is a singleton that is instantiated when the plugin is # activated. It collects performance data from ruby applications # in realtime as the application runs, and periodically sends that # data to the NewRelic server. class Agent + extend NewRelic::Agent::Configuration::Instance + def initialize @launch_time = Time.now @metric_ids = {} @stats_engine = NewRelic::Agent::StatsEngine.new @transaction_sampler = NewRelic::Agent::TransactionSampler.new @sql_sampler = NewRelic::Agent::SqlSampler.new - @stats_engine.transaction_sampler = @transaction_sampler + @thread_profiler = NewRelic::Agent::ThreadProfiler.new @error_collector = NewRelic::Agent::ErrorCollector.new @connect_attempts = 0 @last_harvest_time = Time.now @obfuscator = lambda {|sql| NewRelic::Agent::Database.default_sql_obfuscator(sql) } @forked = false # FIXME: temporary work around for RUBY-839 - if control.monitor_mode? - @service = NewRelic::Agent::NewRelicService.new(control.license_key, control.server) + if Agent.config[:monitor_mode] + @service = NewRelic::Agent::NewRelicService.new end + + txn_tracer_enabler = Proc.new do + if NewRelic::Agent.config[:'transaction_tracer.enabled'] || + NewRelic::Agent.config[:developer_mode] + @stats_engine.transaction_sampler = @transaction_sampler + else + @stats_engine.transaction_sampler = nil + end + end + Agent.config.register_callback(:'transaction_tracer.enabled', &txn_tracer_enabler) + Agent.config.register_callback(:developer_mode, &txn_tracer_enabler) end # contains all the class-level methods for NewRelic::Agent::Agent module ClassMethods # Should only be called by NewRelic::Control - returns a # memoized singleton instance of the agent, creating one if needed def instance @instance ||= self.new end end - + # Holds all the methods defined on NewRelic::Agent::Agent # instances module InstanceMethods - + # holds a proc that is used to obfuscate sql statements attr_reader :obfuscator # the statistics engine that holds all the timeslice data attr_reader :stats_engine # the transaction sampler that handles recording transactions attr_reader :transaction_sampler attr_reader :sql_sampler + # begins a thread profile session when instructed by agent commands + attr_reader :thread_profiler # error collector is a simple collection of recorded errors attr_reader :error_collector # whether we should record raw, obfuscated, or no sql attr_reader :record_sql # a cached set of metric_ids to save the collector some time - @@ -70,34 +88,34 @@ # a configuration for the Real User Monitoring system - # handles things like static setup of the header for inclusion # into pages attr_reader :beacon_configuration attr_accessor :service - + # Returns the length of the unsent errors array, if it exists, # otherwise nil def unsent_errors_size @unsent_errors.length if @unsent_errors end - + # Returns the length of the traces array, if it exists, # otherwise nil def unsent_traces_size @traces.length if @traces end - + # Initializes the unsent timeslice data hash, if needed, and # returns the number of keys it contains def unsent_timeslice_data @unsent_timeslice_data ||= {} @unsent_timeslice_data.keys.length end # fakes out a transaction that did not happen in this process # by creating apdex, summary metrics, and recording statistics # for the transaction - # + # # This method is *deprecated* - it may be removed in future # versions of the agent def record_transaction(duration_seconds, options={}) is_error = options['is_error'] || options['error_message'] || options['exception'] metric = options['metric'] @@ -148,41 +166,43 @@ # * <tt>:keep_retrying => false</tt> if we try to initiate a new # connection, this tells me to only try it once so this method returns # quickly if there is some kind of latency with the server. def after_fork(options={}) @forked = true + Agent.config.apply_config(NewRelic::Agent::Configuration::ManualSource.new(options), 1) + # @connected gets false after we fail to connect or have an error # connecting. @connected has nil if we haven't finished trying to connect. # or we didn't attempt a connection because this is the master process - + if channel_id = options[:report_to_channel] @service = NewRelic::Agent::PipeService.new(channel_id) @connected_pid = $$ @metric_ids = {} end - + # log.debug "Agent received after_fork notice in #$$: [#{control.agent_enabled?}; monitor=#{control.monitor_mode?}; connected: #{@connected.inspect}; thread=#{@worker_thread.inspect}]" - return if !control.agent_enabled? or - !control.monitor_mode? or - @connected == false or + return if !Agent.config[:agent_enabled] || + !Agent.config[:monitor_mode] || + @connected == false || @worker_thread && @worker_thread.alive? - log.info "Starting the worker thread in #$$ after forking." + log.info "Starting the worker thread in #{$$} after forking." # Clear out stats that are left over from parent process reset_stats # Don't ever check to see if this is a spawner. If we're in a forked process # I'm pretty sure we're not also forking new instances. start_worker_thread(options) @stats_engine.start_sampler_thread end - + def forked? @forked end - + # True if we have initialized and completed 'start' def started? @started end @@ -197,11 +217,11 @@ # # Options: # :force_send => (true/false) # force the agent to send data # before shutting down def shutdown(options={}) - run_loop_before_exit = options.fetch(:force_send, false) + run_loop_before_exit = Agent.config[:force_send] return if not started? if @worker_loop @worker_loop.run_task if run_loop_before_exit @worker_loop.stop end @@ -209,11 +229,11 @@ log.debug "Starting Agent shutdown" # if litespeed, then ignore all future SIGUSR1 - it's # litespeed trying to shut us down - if control.dispatcher == :litespeed + if Agent.config[:dispatcher] == :litespeed Signal.trap("SIGUSR1", "IGNORE") Signal.trap("SIGTERM", "IGNORE") end begin @@ -222,10 +242,14 @@ end rescue => e log.error e log.error e.backtrace.join("\n") end + NewRelic::Agent.config.remove_config do |config| + config.class == NewRelic::Agent::Configuration::ManualSource || + config.class == NewRelic::Agent::Configuration::ServerSource + end @started = nil end # Tells the statistics engine we are starting a new transaction def start_transaction @@ -278,11 +302,11 @@ def log NewRelic::Agent.logger end # Herein lies the corpse of the former 'start' method. May - # it's unmatched flog score rest in pieces. + # its unmatched flog score rest in pieces. module Start # Check whether we have already started, which is an error condition def already_started? if started? control.log!("Agent Started Already!", :error) @@ -292,27 +316,32 @@ # The agent is disabled when it is not force enabled by the # 'agent_enabled' option (e.g. in a manual start), or # enabled normally through the configuration file def disabled? - !control.agent_enabled? + !Agent.config[:agent_enabled] end # Logs the dispatcher to the log file to assist with # debugging. When no debugger is present, logs this fact to # assist with proper dispatcher detection def log_dispatcher - dispatcher_name = control.dispatcher.to_s + dispatcher_name = Agent.config[:dispatcher].to_s return if log_if(dispatcher_name.empty?, :info, "No dispatcher detected.") log.info "Dispatcher: #{dispatcher_name}" end # Logs the configured application names def log_app_names - log.info "Application: #{control.app_names.join(", ")}" + names = Agent.config.app_names + if names.respond_to?(:any?) && names.any? + log.info "Application: #{names.join(", ")}" + else + log.error 'Unable to determine application name. Please set the application name in your newrelic.yml or in a NEW_RELIC_APP_NAME environment variable.' + end end - + # Connecting in the foreground blocks further startup of the # agent until we have a connection - useful in cases where # you're trying to log a very-short-running process and want # to get statistics from before a server connection # (typically 20 seconds) exists @@ -336,13 +365,24 @@ # Installs our exit handler, which exploits the weird # behavior of at_exit blocks to make sure it runs last, by # doing an at_exit within an at_exit block. def install_exit_handler - if control.send_data_on_exit && !weird_ruby? - # Our shutdown handler needs to run after other shutdown handlers - at_exit { at_exit { shutdown } } + if Agent.config[:send_data_on_exit] && !weird_ruby? + at_exit do + # Workaround for MRI 1.9 bug that loses exit codes in at_exit blocks. + # This is necessary to get correct exit codes for the agent's + # test suites. + # http://bugs.ruby-lang.org/issues/5218 + if defined?(RUBY_ENGINE) && RUBY_ENGINE == "ruby" && RUBY_VERSION.match(/^1\.9/) + exit_status = $!.status if $!.is_a?(SystemExit) + shutdown + exit exit_status if exit_status + else + shutdown + end + end end end # Tells us in the log file where the log file is # located. This seems redundant, but can come in handy when @@ -378,45 +418,48 @@ end # Warn the user if they have configured their agent not to # send data, that way we can see this clearly in the log file def monitoring? - log_unless(control.monitor_mode?, :warn, "Agent configured not to send data in this environment - edit newrelic.yml to change this") + log_unless(Agent.config[:monitor_mode], :warn, + "Agent configured not to send data in this environment.") end # Tell the user when the license key is missing so they can # fix it by adding it to the file def has_license_key? - log_unless(control.license_key, :error, "No license key found. Please edit your newrelic.yml file and insert your license key.") + log_unless(Agent.config[:license_key], :warn, + "No license key found in newrelic.yml config.") end # A correct license key exists and is of the proper length def has_correct_license_key? has_license_key? && correct_license_length end # A license key is an arbitrary 40 character string, # usually looks something like a SHA1 hash def correct_license_length - key = control.license_key + key = Agent.config[:license_key] log_unless((key.length == 40), :error, "Invalid license key: #{key}") end # If we're using a dispatcher that forks before serving # requests, we need to wait until the children are forked # before connecting, otherwise the parent process sends odd data def using_forking_dispatcher? - log_if([:passenger, :unicorn, :rainbows].include?(control.dispatcher), :info, "Connecting workers after forking.") + log_if([:passenger, :unicorn, :rainbows].include?(Agent.config[:dispatcher]), + :info, "Connecting workers after forking.") end # Sanity-check the agent configuration and start the agent, # setting up the worker thread and the exit handler to shut # down the agent def check_config_and_start_agent return unless monitoring? && has_correct_license_key? return if using_forking_dispatcher? - connect_in_foreground if control.sync_startup + connect_in_foreground if Agent.config[:sync_startup] start_worker_thread install_exit_handler end end @@ -427,11 +470,10 @@ return if already_started? || disabled? @started = true @local_host = determine_host log_dispatcher log_app_names - config_transaction_tracer check_config_and_start_agent log_version_and_pid notify_log_file_location end @@ -445,48 +487,27 @@ @last_harvest_time = Time.now @launch_time = Time.now end private - + # All of this module used to be contained in the # start_worker_thread method - this is an artifact of # refactoring and can be moved, renamed, etc at will module StartWorkerThread - - # disable transaction sampling if disabled by the server - # and we're not in dev mode - def check_transaction_sampler_status - if control.developer_mode? || @should_send_samples - @transaction_sampler.enable - else - @transaction_sampler.disable - end - end - - def check_sql_sampler_status - # disable sql sampling if disabled by the server - # and we're not in dev mode - if @sql_sampler.config.fetch('enabled', true) && ['raw', 'obfuscated'].include?(@sql_sampler.config.fetch('record_sql', 'obfuscated').to_s) && @transaction_sampler.config.fetch('enabled', true) - @sql_sampler.enable - else - @sql_sampler.disable - end - end - # logs info about the worker loop so users can see when the # agent actually begins running in the background def log_worker_loop_start - log.info "Reporting performance data every #{@report_period} seconds." + log.info "Reporting performance data every #{Agent.config[:data_report_period]} seconds." log.debug "Running worker loop" end # Creates the worker loop and loads it with the instructions # it should run every @report_period seconds def create_and_run_worker_loop @worker_loop = WorkerLoop.new - @worker_loop.run(@report_period) do + @worker_loop.run(Agent.config[:data_report_period]) do transmit_data end end # Handles the case where the server tells us to restart - @@ -555,12 +576,10 @@ # the server rejected us for a licensing reason and we should # just exit the thread. If it returns nil # that means it didn't try to connect because we're in the master. connect(connection_options) if @connected - check_transaction_sampler_status - check_sql_sampler_status log_worker_loop_start create_and_run_worker_loop # never reaches here unless there is a problem or # the agent is exiting else @@ -575,21 +594,20 @@ # Try to launch the worker thread and connect to the server. # # See #connect for a description of connection_options. def start_worker_thread(connection_options = {}) log.debug "Creating Ruby Agent worker thread." - @worker_thread = Thread.new do + @worker_thread = NewRelic::Agent::Thread.new('Worker Loop') do deferred_work!(connection_options) - end # thread new - @worker_thread['newrelic_label'] = 'Worker Loop' + end end # A shorthand for NewRelic::Control.instance def control NewRelic::Control.instance end - + # This module is an artifact of a refactoring of the connect # method - all of its methods are used in that context, so it # can be refactored at will. It should be fully tested module Connect # the frequency with which we should try to connect to the @@ -671,43 +689,43 @@ # If we are using a seed and token to validate the agent, we # should debug log that fact so that debug logs include a # clue that token authentication is what will be used def log_seed_token - if control.validate_seed - log.debug "Connecting with validation seed/token: #{control.validate_seed}/#{control.validate_token}" + if Agent.config[:validate_seed] + log.debug "Connecting with validation seed/token: #{Agent.config[:validate_seed]}/#{Agent.config[:validate_token]}" end end # Checks whether we should send environment info, and if so, # returns the snapshot from the local environment def environment_for_connect - control['send_environment_info'] != false ? control.local_env.snapshot : [] + Agent.config[:send_environment_info] ? Control.instance.local_env.snapshot : [] end # These validation settings are used for cases where a # dynamic server is spun up for clients - partners can # include a seed and token to indicate that the host is # allowed to connect, rather than setting a unique hostname def validate_settings { - :seed => control.validate_seed, - :token => control.validate_token + :seed => Agent.config[:validate_seed], + :token => Agent.config[:validate_token] } end # Initializes the hash of settings that we send to the # server. Returns a literal hash containing the options def connect_settings { :pid => $$, :host => @local_host, - :app_name => control.app_names, + :app_name => Agent.config.app_names, :language => 'ruby', :agent_version => NewRelic::VERSION::STRING, :environment => environment_for_connect, - :settings => control.settings, + :settings => Agent.config.flattened_config, :validate => validate_settings } end # Does some simple logging to make sure that our seed and @@ -716,131 +734,15 @@ def connect_to_server log_seed_token @service.connect(connect_settings) end - # Configures the error collector if the server says that we - # are allowed to send errors. Pretty simple, and logs at - # debug whether errors will or will not be sent. - def configure_error_collector!(server_enabled) - # Reinitialize the error collector - @error_collector = NewRelic::Agent::ErrorCollector.new - # Ask for permission to collect error data - enabled = if error_collector.config_enabled && server_enabled - error_collector.enabled = true - else - error_collector.enabled = false - end - log.debug "Errors will #{enabled ? '' : 'not '}be sent to the New Relic service." - end - - # Random sampling is enabled based on a sample rate, which - # is the n in "every 1/n transactions is added regardless of - # its length". - # - # uses a sane default for sampling rate if the sampling rate - # is zero, since the collector currently sends '0' as a - # sampling rate for all accounts, which is probably for - # legacy reasons - def enable_random_samples!(sample_rate) - sample_rate = 10 unless sample_rate.to_i > 0 - @transaction_sampler.random_sampling = true - @transaction_sampler.sampling_rate = sample_rate - log.info "Transaction sampling enabled, rate = #{@transaction_sampler.sampling_rate}" - end - - # this entire method should be done on the transaction - # sampler object, rather than here. We should pass in the - # sampler config. - def config_transaction_tracer - # Reconfigure the transaction tracer - @transaction_sampler.configure! - @sql_sampler.configure! - @should_send_samples = @config_should_send_samples = @transaction_sampler.config.fetch('enabled', true) - @should_send_random_samples = @transaction_sampler.config.fetch('random_sample', false) - set_sql_recording! - - # default to 2.0, string 'apdex_f' will turn into your - # apdex * 4 - @slowest_transaction_threshold = @transaction_sampler.config.fetch('transaction_threshold', 2.0).to_f - @slowest_transaction_threshold = apdex_f if apdex_f_threshold? - end - - # Enables or disables the transaction tracer and sets its - # options based on the options provided to the - # method. - def configure_transaction_tracer!(server_enabled, sample_rate) - # Ask the server for permission to send transaction samples. - # determined by subscription license. - @transaction_sampler.config['enabled'] = server_enabled - @sql_sampler.configure! - @should_send_samples = @config_should_send_samples && server_enabled - - if @should_send_samples - # I don't think this is ever true, but... - enable_random_samples!(sample_rate) if @should_send_random_samples - - @transaction_sampler.slow_capture_threshold = @slowest_transaction_threshold - - log.debug "Transaction tracing threshold is #{@slowest_transaction_threshold} seconds." - else - log.debug "Transaction traces will not be sent to the New Relic service." - end - end - # apdex_f is always 4 times the apdex_t def apdex_f - (4 * NewRelic::Control.instance.apdex_t).to_f + (4 * Agent.config[:apdex_t]).to_f end - # If the transaction threshold is set to the string - # 'apdex_f', we use 4 times the apdex_t value to record - # transactions. This gears well with using apdex since you - # will attempt to send any transactions that register as 'failing' - def apdex_f_threshold? - @transaction_sampler.config.fetch('transaction_threshold', '') =~ /apdex_f/i - end - - # Sets the sql recording configuration by trying to detect - # any attempt to disable the sql collection - 'off', - # 'false', 'none', and friends. Otherwise, we accept 'raw', - # and unrecognized values default to 'obfuscated' - def set_sql_recording! - record_sql_config = @transaction_sampler.config.fetch('record_sql', :obfuscated) - case record_sql_config.to_s - when 'off' - @record_sql = :off - when 'none' - @record_sql = :off - when 'false' - @record_sql = :off - when 'raw' - @record_sql = :raw - else - @record_sql = :obfuscated - end - - log_sql_transmission_warning? - end - - # Warn the user when we are sending raw sql across the wire - # - they should probably be using ssl when this is true - def log_sql_transmission_warning? - log.warn("Agent is configured to send raw SQL to the service") if @record_sql == :raw - end - - # Asks the collector to tell us which sub-collector we - # should be reporting to, and then does the name resolution - # on that host so we don't block on DNS during the normal - # course of agent processing -# def set_collector_host! -# host = invoke_remote(:get_redirect_host) -# if host -# @collector = control.server_from_host(host) -# end -# end - # Sets the collector host and connects to the server, then # invokes the final configuration with the returned data def query_server_for_configuration finish_setup(connect_to_server) end @@ -852,35 +754,41 @@ # # Can accommodate most arbitrary data - anything extra is # ignored unless we say to do something with it here. def finish_setup(config_data) return if config_data == nil - @service.agent_id = config_data['agent_run_id'] - @report_period = config_data['data_report_period'] - @url_rules = config_data['url_rules'] - @beacon_configuration = BeaconConfiguration.new(config_data) - @server_side_config_enabled = config_data['listen_to_server_config'] - if @server_side_config_enabled + @service.agent_id = config_data['agent_run_id'] if @service + + if config_data['agent_config'] log.info "Using config from server" - log.debug "Server provided config: #{config_data.inspect}" end - control.merge_server_side_config(config_data) if @server_side_config_enabled - config_transaction_tracer - log_connection!(config_data) - configure_transaction_tracer!(config_data['collect_traces'], config_data['sample_rate']) - configure_error_collector!(config_data['collect_errors']) + log.debug "Server provided config: #{config_data.inspect}" + server_config = NewRelic::Agent::Configuration::ServerSource.new(config_data) + Agent.config.apply_config(server_config, 1) + log_connection!(config_data) if @service + + @beacon_configuration = BeaconConfiguration.new end - + # Logs when we connect to the server, for debugging purposes # - makes sure we know if an agent has not connected def log_connection!(config_data) - control.log! "Connected to NewRelic Service at #{@service.collector.name}" + log.info "Connected to NewRelic Service at #{@service.collector.name}" log.debug "Agent Run = #{@service.agent_id}." log.debug "Connection data = #{config_data.inspect}" + if config_data['messages'] && config_data['messages'].any? + log_collector_messages(config_data['messages']) + end end + + def log_collector_messages(messages) + messages.each do |message| + log.send(message['level'].downcase.to_sym, message['message']) + end + end end include Connect # Serialize all the important data that the agent might want @@ -997,11 +905,13 @@ # takes an array of arrays of spec and id, adds it into the # metric cache so we can save the collector some work by # sending integers instead of strings def fill_metric_id_cache(pairs_of_specs_and_ids) - Array(pairs_of_specs_and_ids).each do |metric_spec, metric_id| + Array(pairs_of_specs_and_ids).each do |metric_spec_hash, metric_id| + metric_spec = MetricSpec.new(metric_spec_hash['name'], + metric_spec_hash['scope']) @metric_ids[metric_spec] = metric_id end end # note - exceptions are logged in invoke_remote. If an exception is encountered here, @@ -1010,15 +920,20 @@ def harvest_and_send_timeslice_data now = Time.now NewRelic::Agent.instance.stats_engine.get_stats_no_scope('Supportability/invoke_remote').record_data_point(0.0) NewRelic::Agent.instance.stats_engine.get_stats_no_scope('Supportability/invoke_remote/metric_data').record_data_point(0.0) harvest_timeslice_data(now) - # In this version of the protocol, we get back an assoc array of spec to id. - metric_specs_and_ids = @service.metric_data(@last_harvest_time.to_f, - now.to_f, - @unsent_timeslice_data.values) - metric_specs_and_ids ||= [] + # In this version of the protocol + # we get back an assoc array of spec to id. + metric_specs_and_ids = [] + begin + metric_specs_and_ids = @service.metric_data(@last_harvest_time.to_f, + now.to_f, + @unsent_timeslice_data.values) + rescue UnrecoverableServerException => e + log.debug e.message + end fill_metric_id_cache(metric_specs_and_ids) log.debug "#{now}: sent #{@unsent_timeslice_data.length} timeslices (#{@service.agent_id}) in #{Time.now - now} seconds" # if we successfully invoked this web service, then clear the unsent message cache. @@ -1028,23 +943,26 @@ # Fills the traces array with the harvested transactions from # the transaction sampler, subject to the setting for slowest # transaction threshold def harvest_transaction_traces - @traces = @transaction_sampler.harvest(@traces, @slowest_transaction_threshold) + @traces = @transaction_sampler.harvest(@traces) @traces end def harvest_and_send_slowest_sql # FIXME add the code to try to resend if our connection is down sql_traces = @sql_sampler.harvest unless sql_traces.empty? log.debug "Sending (#{sql_traces.size}) sql traces" begin @service.sql_trace_data(sql_traces) - rescue - @sql_sampler.merge sql_traces + rescue UnrecoverableServerException => e + log.debug e.message + rescue => e + log.debug "Remerging SQL traces after #{e.class.name}: #{e.message}" + @sql_sampler.merge sql_traces end end end # This handles getting the transaction traces and then sending @@ -1056,34 +974,44 @@ def harvest_and_send_slowest_sample harvest_transaction_traces unless @traces.empty? now = Time.now log.debug "Sending (#{@traces.length}) transaction traces" - + begin options = { :keep_backtraces => true } - options[:record_sql] = @record_sql unless @record_sql == :off - if @transaction_sampler.explain_enabled - options[:explain_sql] = @transaction_sampler.explain_threshold + if !(NewRelic::Agent::Database.record_sql_method == :off) + options[:record_sql] = NewRelic::Agent::Database.record_sql_method end - traces = @traces.collect {|trace| trace.prepare_to_send(options)} + if Agent.config[:'transaction_tracer.explain_enabled'] + options[:explain_sql] = Agent.config[:'transaction_tracer.explain_threshold'] + end + traces = @traces.map {|trace| trace.prepare_to_send(options) } @service.transaction_sample_data(traces) - rescue PostTooBigException - # we tried to send too much data, drop the first trace and - # try again - retry if @traces.shift + log.debug "Sent slowest sample (#{@service.agent_id}) in #{Time.now - now} seconds" + rescue UnrecoverableServerException => e + log.debug e.message end - - log.debug "Sent slowest sample (#{@service.agent_id}) in #{Time.now - now} seconds" end # if we succeed sending this sample, then we don't need to keep # the slowest sample around - it has been sent already and we # can clear the collection and move on @traces = nil end + def harvest_and_send_thread_profile(disconnecting=false) + @thread_profiler.stop(true) if disconnecting + + if @thread_profiler.finished? + profile = @thread_profiler.harvest + + log.debug "Sending thread profile #{profile.profile_id}" + @service.profile_data(profile) + end + end + # Gets the collection of unsent errors from the error # collector. We pass back in an existing array of errors that # may be left over from a previous send def harvest_errors @unsent_errors = @error_collector.harvest_errors(@unsent_errors) @@ -1098,35 +1026,64 @@ harvest_errors if @unsent_errors && @unsent_errors.length > 0 log.debug "Sending #{@unsent_errors.length} errors" begin @service.error_data(@unsent_errors) - rescue PostTooBigException - @unsent_errors.shift - retry + rescue UnrecoverableServerException => e + log.debug e.message end # if the remote invocation fails, then we never clear # @unsent_errors, and therefore we can re-attempt to send on # the next heartbeat. Note the error collector maxes out at # 20 instances to prevent leakage @unsent_errors = [] end end - - def transmit_data + + # Only JSON marshalling appears to work with collector on + # get_agent_commands and agent_command_results. We only support + # these features on Ruby versions that can hack JSON out of the box + def agent_commands_supported? + RUBY_VERSION >= "1.9.2" + end + + def check_for_agent_commands + if !agent_commands_supported? + log.debug("Skipping agent commands, as they aren't supported on this environment") + return + end + + commands = @service.get_agent_commands + log.debug "Received get_agent_commands = #{commands}" + + @thread_profiler.respond_to_commands(commands) do |command_id, error| + @service.agent_command_results(command_id, error) + end + end + + def transmit_data(disconnecting=false) + now = Time.now log.debug "Sending data to New Relic Service" harvest_and_send_errors harvest_and_send_slowest_sample harvest_and_send_slowest_sql harvest_and_send_timeslice_data + harvest_and_send_thread_profile(disconnecting) + + check_for_agent_commands rescue => e retry_count ||= 0 retry_count += 1 - retry unless retry_count > 1 + if retry_count <= 1 + log.debug "retrying transmit_data after #{e}" + retry + end raise e ensure NewRelic::Agent::Database.close_connections unless forked? + @stats_engine.get_stats_no_scope('Supportability/Harvest') \ + .record_data_point((Time.now - now).to_f) end # This method contacts the server to send remaining data and # let the server know that the agent is shutting down - this # allows us to do things like accurately set the end of the @@ -1136,25 +1093,27 @@ # disconnect, so that the parent process can continue to send data def graceful_disconnect if @connected begin @service.request_timeout = 10 - transmit_data + transmit_data(true) + if @connected_pid == $$ && !@service.kind_of?(NewRelic::Agent::NewRelicService) log.debug "Sending New Relic service agent run shutdown message" @service.shutdown(Time.now.to_f) else log.debug "This agent connected from parent process #{@connected_pid}--not sending shutdown" end log.debug "Graceful disconnect complete" - rescue Timeout::Error, StandardError + rescue Timeout::Error, StandardError => e + log.debug "Error when disconnecting #{e.class.name}: #{e.message}" end else log.debug "Bypassing graceful disconnect - agent not connected" end end end - + extend ClassMethods include InstanceMethods include BrowserMonitoring end end