lib/perus/server/db.rb in perus-0.1.13 vs lib/perus/server/db.rb in perus-0.1.14
- old
+ new
@@ -3,10 +3,11 @@
require 'concurrent'
module Perus::Server
module DB
MAX_VACUUM_ATTEMPTS = 5
+ MAX_CLEAN_ATTEMPTS = 5
def self.db
@db
end
@@ -78,15 +79,33 @@
# a fixed number of hours of data are kept in the database. once an
# hour, old values and files are removed. if all values of a metric
# are removed from a system, the accompanying metric record is also
# removed.
cleanup_task = Concurrent::TimerTask.new do
- begin
- start = Time.now
- Perus::Server::DB.cleanup
- Stats.cleaned!(Time.now - start)
- rescue
+ attempts = 0
+ complete = false
+
+ while !complete && attempts < MAX_CLEAN_ATTEMPTS
+ begin
+ puts "Cleaning, attempt #{attempts + 1}"
+ start = Time.now
+ Perus::Server::DB.cleanup
+ Stats.cleaned!(Time.now - start)
+ complete = true
+ puts "Cleaning complete"
+
+ rescue
+ attempts += 1
+ if attempts < MAX_CLEAN_ATTEMPTS
+ puts "Clean failed, will reattempt after short sleep"
+ sleep(5)
+ end
+ end
+ end
+
+ if !complete
+ puts "Clean failed more than MAX_CLEAN_ATTEMPTS"
Stats.cleaned!('failed')
end
end
# fire every hour
@@ -96,15 +115,17 @@
# alerts can be process intensive, so to keep page refreshes
# responsive the 'active' state of an alert for each system is
# cached so lookups can be done against the db, rather than running
# each alert for each system on a page load.
cache_alerts_task = Concurrent::TimerTask.new do
- begin
- start = Time.now
- Perus::Server::Alert.cache_active_alerts
- Stats.alerts_cached!(Time.now - start)
- rescue
- Stats.alerts_cached!('failed')
+ if Server.ping_queue.empty?
+ begin
+ start = Time.now
+ Perus::Server::Alert.cache_active_alerts
+ Stats.alerts_cached!(Time.now - start)
+ rescue
+ Stats.alerts_cached!('failed')
+ end
end
end
cache_alerts_task.execution_interval = Server.options.cache_alerts_secs
cache_alerts_task.execute