lib/perus/server/db.rb in perus-0.1.13 vs lib/perus/server/db.rb in perus-0.1.14

- old
+ new

@@ -3,10 +3,11 @@ require 'concurrent' module Perus::Server module DB MAX_VACUUM_ATTEMPTS = 5 + MAX_CLEAN_ATTEMPTS = 5 def self.db @db end @@ -78,15 +79,33 @@ # a fixed number of hours of data are kept in the database. once an # hour, old values and files are removed. if all values of a metric # are removed from a system, the accompanying metric record is also # removed. cleanup_task = Concurrent::TimerTask.new do - begin - start = Time.now - Perus::Server::DB.cleanup - Stats.cleaned!(Time.now - start) - rescue + attempts = 0 + complete = false + + while !complete && attempts < MAX_CLEAN_ATTEMPTS + begin + puts "Cleaning, attempt #{attempts + 1}" + start = Time.now + Perus::Server::DB.cleanup + Stats.cleaned!(Time.now - start) + complete = true + puts "Cleaning complete" + + rescue + attempts += 1 + if attempts < MAX_CLEAN_ATTEMPTS + puts "Clean failed, will reattempt after short sleep" + sleep(5) + end + end + end + + if !complete + puts "Clean failed more than MAX_CLEAN_ATTEMPTS" Stats.cleaned!('failed') end end # fire every hour @@ -96,15 +115,17 @@ # alerts can be process intensive, so to keep page refreshes # responsive the 'active' state of an alert for each system is # cached so lookups can be done against the db, rather than running # each alert for each system on a page load. cache_alerts_task = Concurrent::TimerTask.new do - begin - start = Time.now - Perus::Server::Alert.cache_active_alerts - Stats.alerts_cached!(Time.now - start) - rescue - Stats.alerts_cached!('failed') + if Server.ping_queue.empty? + begin + start = Time.now + Perus::Server::Alert.cache_active_alerts + Stats.alerts_cached!(Time.now - start) + rescue + Stats.alerts_cached!('failed') + end end end cache_alerts_task.execution_interval = Server.options.cache_alerts_secs cache_alerts_task.execute