lib/new_relic/agent/agent.rb in newrelic_rpm-2.13.4 vs lib/new_relic/agent/agent.rb in newrelic_rpm-2.13.5.beta1

- old
+ new

@@ -35,10 +35,11 @@ @histogram = NewRelic::Histogram.new(NewRelic::Control.instance.apdex_t / 10) @stats_engine = NewRelic::Agent::StatsEngine.new @transaction_sampler = NewRelic::Agent::TransactionSampler.new @stats_engine.transaction_sampler = @transaction_sampler @error_collector = NewRelic::Agent::ErrorCollector.new + @connect_attempts = 0 @request_timeout = NewRelic::Control.instance.fetch('timeout', 2 * 60) @last_harvest_time = Time.now @obfuscator = method(:default_sql_obfuscator) @@ -92,15 +93,10 @@ error_collector.notice_error e, :uri => options['uri'], :metric => metric end # busy time ? end - # This method is deprecated. Use NewRelic::Agent.manual_start - def manual_start(ignored=nil, also_ignored=nil) - raise "This method no longer supported. Instead use the class method NewRelic::Agent.manual_start" - end - # This method should be called in a forked process after a fork. # It assumes the parent process initialized the agent, but does # not assume the agent started. # # The call is idempotent, but not re-entrant. @@ -224,86 +220,164 @@ end def log NewRelic::Agent.logger end + + # Herein lies the corpse of the former 'start' method. May + # it's unmatched flog score rest in pieces. + module Start + def already_started? + if started? + control.log!("Agent Started Already!", :error) + true + end + end - # Start up the agent. This verifies that the agent_enabled? is - # true and initializes the sampler based on the current - # configuration settings. Then it will fire up the background - # thread for sending data to the server if applicable. - def start - if started? - control.log! "Agent Started Already!", :error - return + def disabled? + !control.agent_enabled? end - return if !control.agent_enabled? - @started = true - @local_host = determine_host + + def log_dispatcher + dispatcher_name = control.dispatcher.to_s + return if log_if(dispatcher_name.empty?, :info, "No dispatcher detected.") + log.info "Dispatcher: #{dispatcher_name}" + end + + def log_app_names + log.info "Application: #{control.app_names.join(", ")}" + end - if control.dispatcher.nil? || control.dispatcher.to_s.empty? - log.info "No dispatcher detected." - else - log.info "Dispatcher: #{control.dispatcher.to_s}" + def apdex_f + (4 * NewRelic::Control.instance.apdex_t).to_f end - log.info "Application: #{control.app_names.join(", ")}" unless control.app_names.empty? - sampler_config = control.fetch('transaction_tracer', {}) - # TODO: Should move this state into the transaction sampler instance - @should_send_samples = @config_should_send_samples = sampler_config.fetch('enabled', true) - @should_send_random_samples = sampler_config.fetch('random_sample', false) - @explain_threshold = sampler_config.fetch('explain_threshold', 0.5).to_f - @explain_enabled = sampler_config.fetch('explain_enabled', true) - @record_sql = sampler_config.fetch('record_sql', :obfuscated).to_sym + def apdex_f_threshold? + sampler_config.fetch('transaction_threshold', '') =~ /apdex_f/i + end - # use transaction_threshold: 4.0 to force the TT collection - # threshold to 4 seconds - # use transaction_threshold: apdex_f to use your apdex t value - # multiplied by 4 - # undefined transaction_threshold defaults to 2.0 - apdex_f = 4 * NewRelic::Control.instance.apdex_t - @slowest_transaction_threshold = sampler_config.fetch('transaction_threshold', 2.0) - if @slowest_transaction_threshold =~ /apdex_f/i - @slowest_transaction_threshold = apdex_f + def set_sql_recording! + @record_sql = sampler_config.fetch('record_sql', :obfuscated).to_sym + log_sql_transmission_warning? end - @slowest_transaction_threshold = @slowest_transaction_threshold.to_f - log.warn "Agent is configured to send raw SQL to RPM service" if @record_sql == :raw + def log_sql_transmission_warning? + log_if((@record_sql == :raw), :warn, "Agent is configured to send raw SQL to RPM service") + end - case - when !control.monitor_mode? - log.warn "Agent configured not to send data in this environment - edit newrelic.yml to change this" - when !control.license_key - log.error "No license key found. Please edit your newrelic.yml file and insert your license key." - when control.license_key.length != 40 - log.error "Invalid license key: #{control.license_key}" - when [:passenger, :unicorn].include?(control.dispatcher) - log.info "Connecting workers after forking." - else - # Do the connect in the foreground if we are in sync mode - NewRelic::Agent.disable_all_tracing { connect(:keep_retrying => false) } if control.sync_startup + def sampler_config + control.fetch('transaction_tracer', {}) + end + + # this entire method should be done on the transaction + # sampler object, rather than here. We should pass in the + # sampler config. + def config_transaction_tracer + @should_send_samples = @config_should_send_samples = sampler_config.fetch('enabled', true) + @should_send_random_samples = sampler_config.fetch('random_sample', false) + @explain_threshold = sampler_config.fetch('explain_threshold', 0.5).to_f + @explain_enabled = sampler_config.fetch('explain_enabled', true) + set_sql_recording! + + # default to 2.0, string 'apdex_f' will turn into your + # apdex * 4 + @slowest_transaction_threshold = sampler_config.fetch('transaction_threshold', 2.0).to_f + @slowest_transaction_threshold = apdex_f if apdex_f_threshold? + end - # Start the event loop and initiate connection if necessary - start_worker_thread + def connect_in_foreground + NewRelic::Agent.disable_all_tracing { connect(:keep_retrying => false) } + end - # Our shutdown handler needs to run after other shutdown handlers - # that may be doing things like running the app (hello sinatra). - if control.send_data_on_exit - if RUBY_VERSION =~ /rubinius/i - list = at_exit { shutdown } - # move the shutdown handler to the front of the list, to - # execute last: - list.unshift(list.pop) - elsif !defined?(JRuby) or !defined?(Sinatra::Application) - at_exit { at_exit { shutdown } } - end + def using_rubinius? + RUBY_VERSION =~ /rubinius/i + end + + def using_jruby? + defined?(JRuby) + end + + def using_sinatra? + defined?(Sinatra::Application) + end + + # we should not set an at_exit block if people are using + # these as they don't do standard at_exit behavior per MRI/YARV + def weird_ruby? + using_rubinius? || using_jruby? || using_sinatra? + end + + def install_exit_handler + if control.send_data_on_exit && !weird_ruby? + # Our shutdown handler needs to run after other shutdown handlers + at_exit { at_exit { shutdown } } end end - log.info "New Relic RPM Agent #{NewRelic::VERSION::STRING} Initialized: pid = #$$" - log.info "Agent Log found in #{NewRelic::Control.instance.log_file}" if NewRelic::Control.instance.log_file + + def notify_log_file_location + log_file = NewRelic::Control.instance.log_file + log_if(log_file, :info, "Agent Log found in #{log_file}") + end + + def log_version_and_pid + log.info "New Relic RPM Agent #{NewRelic::VERSION::STRING} Initialized: pid = #{$$}" + end + + def log_if(boolean, level, message) + self.log.send(level, message) if boolean + boolean + end + + def log_unless(boolean, level, message) + self.log.send(level, message) unless boolean + boolean + end + + def monitoring? + log_unless(control.monitor_mode?, :warn, "Agent configured not to send data in this environment - edit newrelic.yml to change this") + end + + def has_license_key? + log_unless(control.license_key, :error, "No license key found. Please edit your newrelic.yml file and insert your license key.") + end + + def has_correct_license_key? + has_license_key? && correct_license_length + end + + def correct_license_length + key = control.license_key + log_unless((key.length == 40), :error, "Invalid license key: #{key}") + end + + def using_forking_dispatcher? + log_if([:passenger, :unicorn].include?(control.dispatcher), :info, "Connecting workers after forking.") + end + + def check_config_and_start_agent + return unless monitoring? && has_correct_license_key? + return if using_forking_dispatcher? + connect_in_foreground if control.sync_startup + start_worker_thread + install_exit_handler + end end + include Start + + def start + return if already_started? || disabled? + @started = true + @local_host = determine_host + log_dispatcher + log_app_names + config_transaction_tracer + check_config_and_start_agent + log_version_and_pid + notify_log_file_location + end + # Clear out the metric data, errors, and transaction traces. Reset the histogram data. def reset_stats @stats_engine.reset_stats @unsent_errors = [] @traces = nil @@ -316,181 +390,296 @@ private def collector @collector ||= control.server end - # Try to launch the worker thread and connect to the server. - # - # See #connect for a description of connection_options. - def start_worker_thread(connection_options = {}) - log.debug "Creating RPM worker thread." - @worker_thread = Thread.new do - begin + module StartWorkerThread + + def check_transaction_sampler_status + # disable transaction sampling if disabled by the server + # and we're not in dev mode + if control.developer_mode? || @should_send_samples + @transaction_sampler.enable + else + @transaction_sampler.disable + end + end + + def log_worker_loop_start + log.info "Reporting performance data every #{@report_period} seconds." + log.debug "Running worker loop" + end + + def create_and_run_worker_loop + @worker_loop = WorkerLoop.new + @worker_loop.run(@report_period) do + harvest_and_send_timeslice_data + harvest_and_send_slowest_sample if @should_send_samples + harvest_and_send_errors if error_collector.enabled + end + end + + def handle_force_restart(error) + log.info error.message + # disconnect and start over. + # clear the stats engine + reset_stats + @metric_ids = {} + @connected = nil + # Wait a short time before trying to reconnect + sleep 30 + end + + def handle_force_disconnect(error) + # when a disconnect is requested, stop the current thread, which + # is the worker thread that gathers data and talks to the + # server. + log.error "RPM forced this agent to disconnect (#{error.message})" + disconnect + end + + def handle_server_connection_problem(error) + log.error "Unable to establish connection with the server. Run with log level set to debug for more information." + log.debug("#{error.class.name}: #{error.message}\n#{error.backtrace.first}") + disconnect + end + + def handle_other_error(error) + log.error "Terminating worker loop: #{error.class.name}: #{error.message}\n #{error.backtrace.join("\n ")}" + disconnect + end + + def catch_errors + yield + rescue NewRelic::Agent::ForceRestartException => e + handle_force_restart(e) + retry + rescue NewRelic::Agent::ForceDisconnectException => e + handle_force_disconnect(e) + rescue NewRelic::Agent::ServerConnectionException => e + handle_server_connection_problem(e) + rescue Exception => e + handle_other_error(e) + end + + def deferred_work!(connection_options) + catch_errors do NewRelic::Agent.disable_all_tracing do # We try to connect. If this returns false that means # the server rejected us for a licensing reason and we should # just exit the thread. If it returns nil # that means it didn't try to connect because we're in the master. connect(connection_options) if @connected - # disable transaction sampling if disabled by the server and we're not in dev mode - if !control.developer_mode? && !@should_send_samples - @transaction_sampler.disable - else - @transaction_sampler.enable # otherwise ensure TT's are enabled - end - - log.info "Reporting performance data every #{@report_period} seconds." - log.debug "Running worker loop" - # Note if the agent attempts to report more frequently than allowed by the server - # the server will start dropping data. - @worker_loop = WorkerLoop.new - @worker_loop.run(@report_period) do - harvest_and_send_timeslice_data - harvest_and_send_slowest_sample if @should_send_samples - harvest_and_send_errors if error_collector.enabled - end + check_transaction_sampler_status + log_worker_loop_start + create_and_run_worker_loop else - log.debug "No connection. Worker thread finished." + log.debug "No connection. Worker thread ending." end end - rescue NewRelic::Agent::ForceRestartException => e - log.info e.message - # disconnect and start over. - # clear the stats engine - reset_stats - @metric_ids = {} - @connected = nil - # Wait a short time before trying to reconnect - sleep 30 - retry - rescue NewRelic::Agent::ForceDisconnectException => e - # when a disconnect is requested, stop the current thread, which - # is the worker thread that gathers data and talks to the - # server. - log.error "RPM forced this agent to disconnect (#{e.message})" - @connected = false - rescue NewRelic::Agent::ServerConnectionException => e - log.error "Unable to establish connection with the server. Run with log level set to debug for more information." - log.debug("#{e.class.name}: #{e.message}\n#{e.backtrace.first}") - @connected = false - rescue Exception => e - log.error "Terminating worker loop: #{e.class.name}: #{e}\n #{e.backtrace.join("\n ")}" - @connected = false - end # begin + end + end + end + include StartWorkerThread + + # Try to launch the worker thread and connect to the server. + # + # See #connect for a description of connection_options. + def start_worker_thread(connection_options = {}) + log.debug "Creating RPM worker thread." + @worker_thread = Thread.new do + deferred_work!(connection_options) end # thread new @worker_thread['newrelic_label'] = 'Worker Loop' end def control NewRelic::Control.instance end + + module Connect + attr_accessor :connect_retry_period + attr_accessor :connect_attempts - # Connect to the server and validate the license. If successful, - # @connected has true when finished. If not successful, you can - # keep calling this. Return false if we could not establish a - # connection with the server and we should not retry, such as if - # there's a bad license key. - # - # Set keep_retrying=false to disable retrying and return asap, such as when - # invoked in the foreground. Otherwise this runs until a successful - # connection is made, or the server rejects us. - # - # * <tt>:keep_retrying => false</tt> to only try to connect once, and - # return with the connection set to nil. This ensures we may try again - # later (default true). - # * <tt>force_reconnect => true</tt> if you want to establish a new connection - # to the server before running the worker loop. This means you get a separate - # agent run and RPM sees it as a separate instance (default is false). - def connect(options) - # Don't proceed if we already connected (@connected=true) or if we tried - # to connect and were rejected with prejudice because of a license issue - # (@connected=false). - return if !@connected.nil? && !options[:force_reconnect] - keep_retrying = options[:keep_retrying].nil? || options[:keep_retrying] + def disconnect + @connected = false + true + end - # wait a few seconds for the web server to boot, necessary in development - connect_retry_period = keep_retrying ? 10 : 0 - connect_attempts = 0 - @agent_id = nil - begin - sleep connect_retry_period.to_i - log.debug "Connecting Process to RPM: #$0" - host = invoke_remote(:get_redirect_host) - @collector = control.server_from_host(host) if host - environment = control['send_environment_info'] != false ? control.local_env.snapshot : [] - log.debug "Connecting with validation seed/token: #{control.validate_seed}/#{control.validate_token}" if control.validate_seed - connect_data = invoke_remote :connect, + def tried_to_connect?(options) + !(@connected.nil? || options[:force_reconnect]) + end + + def should_keep_retrying?(options) + @keep_retrying = (options[:keep_retrying].nil? || options[:keep_retrying]) + end + + def get_retry_period + return 600 if self.connect_attempts > 6 + connect_attempts * 60 + end + + def increment_retry_period! + self.connect_retry_period=(get_retry_period) + end + + def should_retry? + if @keep_retrying + self.connect_attempts=(connect_attempts + 1) + increment_retry_period! + log.info "Will re-attempt in #{connect_retry_period} seconds" + true + else + disconnect + false + end + end + + def log_error(error) + log.error "Error establishing connection with New Relic RPM Service at #{control.server}: #{error.message}" + log.debug error.backtrace.join("\n") + end + + def handle_license_error(error) + log.error error.message + log.info "Visit NewRelic.com to obtain a valid license key, or to upgrade your account." + disconnect + end + + def log_seed_token + if control.validate_seed + log.debug "Connecting with validation seed/token: #{control.validate_seed}/#{control.validate_token}" + end + end + + def environment_for_connect + control['send_environment_info'] != false ? control.local_env.snapshot : [] + end + + def validate_settings + { + :seed => control.validate_seed, + :token => control.validate_token + } + end + + def connect_settings + { :pid => $$, :host => @local_host, :app_name => control.app_names, :language => 'ruby', :agent_version => NewRelic::VERSION::STRING, - :environment => environment, + :environment => environment_for_connect, :settings => control.settings, - :validate => {:seed => control.validate_seed, - :token => control.validate_token } + :validate => validate_settings + } + end + def connect_to_server + log_seed_token + connect_data = invoke_remote(:connect, connect_settings) + end - @agent_id = connect_data['agent_run_id'] - @report_period = connect_data['data_report_period'] - @url_rules = connect_data['url_rules'] + def configure_error_collector!(server_enabled) + # Ask for permission to collect error data + enabled = if error_collector.config_enabled && server_enabled + error_collector.enabled = true + else + error_collector.enabled = false + end + log.debug "Errors will #{enabled ? '' : 'not '}be sent to the RPM service." + end + + def enable_random_samples!(sample_rate) + @transaction_sampler.random_sampling = true + @transaction_sampler.sampling_rate = sample_rate + log.info "Transaction sampling enabled, rate = #{@transaction_sampler.sampling_rate}" + end + - control.log! "Connected to NewRelic Service at #{@collector}" - log.debug "Agent Run = #{@agent_id}." - log.debug "Connection data = #{connect_data.inspect}" - + def configure_transaction_tracer!(server_enabled, sample_rate) # Ask the server for permission to send transaction samples. # determined by subscription license. - @should_send_samples = @config_should_send_samples && connect_data['collect_traces'] + @should_send_samples = @config_should_send_samples && server_enabled if @should_send_samples - if @should_send_random_samples - @transaction_sampler.random_sampling = true - @transaction_sampler.sampling_rate = connect_data['sampling_rate'] - log.info "Transaction sampling enabled, rate = #{@transaction_sampler.sampling_rate}" - end + # I don't think this is ever true, but... + enable_random_samples!(sample_rate) if @should_send_random_samples log.debug "Transaction tracing threshold is #{@slowest_transaction_threshold} seconds." else log.debug "Transaction traces will not be sent to the RPM service." end + end - # Ask for permission to collect error data - error_collector.enabled = error_collector.config_enabled && connect_data['collect_errors'] + def set_collector_host! + host = invoke_remote(:get_redirect_host) + if host + @collector = control.server_from_host(host) + end + end - log.debug "Errors will be sent to the RPM service." if error_collector.enabled + def query_server_for_configuration + set_collector_host! + + finish_setup(connect_to_server) + end + def finish_setup(config_data) + @agent_id = config_data['agent_run_id'] + @report_period = config_data['data_report_period'] + @url_rules = config_data['url_rules'] - @connected_pid = $$ - @connected = true + log_connection!(config_data) + configure_transaction_tracer!(config_data['collect_traces'], config_data['sample_rate']) + configure_error_collector!(config_data['collect_errors']) + end - rescue NewRelic::Agent::LicenseException => e - log.error e.message - log.info "Visit NewRelic.com to obtain a valid license key, or to upgrade your account." - @connected = false + def log_connection!(config_data) + control.log! "Connected to NewRelic Service at #{@collector}" + log.debug "Agent Run = #{@agent_id}." + log.debug "Connection data = #{config_data.inspect}" + end + end + include Connect - rescue Timeout::Error, StandardError => e - if e.instance_of? NewRelic::Agent::ServerConnectionException - log.info "Unable to establish connection with New Relic RPM Service at #{control.server}: #{e.message}" - log.debug e.backtrace.join("\n") - else - log.error "Error establishing connection with New Relic RPM Service at #{control.server}: #{e.message}" - log.debug e.backtrace.join("\n") - end - # retry logic - if keep_retrying - connect_attempts += 1 - case connect_attempts - when 1..2 - connect_retry_period, period_msg = 60, "1 minute" - when 3..5 - connect_retry_period, period_msg = 60 * 2, "2 minutes" - else - connect_retry_period, period_msg = 5 * 60, "5 minutes" - end - log.info "Will re-attempt in #{period_msg}" - retry - else - @connected = nil - end + # Connect to the server and validate the license. If successful, + # @connected has true when finished. If not successful, you can + # keep calling this. Return false if we could not establish a + # connection with the server and we should not retry, such as if + # there's a bad license key. + # + # Set keep_retrying=false to disable retrying and return asap, such as when + # invoked in the foreground. Otherwise this runs until a successful + # connection is made, or the server rejects us. + # + # * <tt>:keep_retrying => false</tt> to only try to connect once, and + # return with the connection set to nil. This ensures we may try again + # later (default true). + # * <tt>force_reconnect => true</tt> if you want to establish a new connection + # to the server before running the worker loop. This means you get a separate + # agent run and RPM sees it as a separate instance (default is false). + def connect(options) + # Don't proceed if we already connected (@connected=true) or if we tried + # to connect and were rejected with prejudice because of a license issue + # (@connected=false), unless we're forced to by force_reconnect. + return if tried_to_connect?(options) + + # wait a few seconds for the web server to boot, necessary in development + @connect_retry_period = should_keep_retrying?(options) ? 10 : 0 + + sleep connect_retry_period + log.debug "Connecting Process to RPM: #$0" + query_server_for_configuration + @connected_pid = $$ + @connected = true + rescue NewRelic::Agent::LicenseException => e + handle_license_error(e) + rescue Timeout::Error, StandardError => e + log_error(e) + if should_retry? + retry + else + disconnect end end def determine_host Socket.gethostname