lib/perus/server/db.rb in perus-0.1.11 vs lib/perus/server/db.rb in perus-0.1.12
- old
+ new
@@ -2,10 +2,12 @@
require 'sequel/plugins/serialization'
require 'concurrent'
module Perus::Server
module DB
+ MAX_VACUUM_ATTEMPTS = 5
+
def self.db
@db
end
def self.start
@@ -40,11 +42,35 @@
# performance rather than reclaim unused space. as old values and
# metrics are deleted the data become very fragmented. vacuuming
# restructures the db so system records in the values index should
# be sequentially stored
vacuum_task = Concurrent::TimerTask.new do
- @db.execute('vacuum')
+ attempts = 0
+ complete = false
+
+ while !complete && attempts < MAX_VACUUM_ATTEMPTS
+ begin
+ puts "Vacuuming, attempt #{attempts + 1}"
+ start = Time.now
+ @db.execute('vacuum')
+ Stats.vacuumed!(Time.now - start)
+ complete = true
+ puts "Vacuuming complete"
+
+ rescue
+ attempts += 1
+ if attempts < MAX_VACUUM_ATTEMPTS
+ puts "Vacuum failed, will reattempt after short sleep"
+ sleep(5)
+ end
+ end
+ end
+
+ if !complete
+ puts "Vacuum failed more than MAX_VACUUM_ATTEMPTS"
+ Stats.vacuumed!('failed')
+ end
end
# fire every 12 hours
vacuum_task.execution_interval = 60 * 60 * 12
vacuum_task.execute
@@ -52,11 +78,17 @@
# a fixed number of hours of data are kept in the database. once an
# hour, old values and files are removed. if all values of a metric
# are removed from a system, the accompanying metric record is also
# removed.
cleanup_task = Concurrent::TimerTask.new do
- Perus::Server::DB.cleanup
+ begin
+ start = Time.now
+ Perus::Server::DB.cleanup
+ Stats.cleaned!(Time.now - start)
+ rescue
+ Stats.cleaned!('failed')
+ end
end
# fire every hour
cleanup_task.execution_interval = 60 * 60
cleanup_task.execute
@@ -64,13 +96,19 @@
# alerts can be process intensive, so to keep page refreshes
# responsive the 'active' state of an alert for each system is
# cached so lookups can be done against the db, rather than running
# each alert for each system on a page load.
cache_alerts_task = Concurrent::TimerTask.new do
- Perus::Server::Alert.cache_active_alerts
+ begin
+ start = Time.now
+ Perus::Server::Alert.cache_active_alerts
+ Stats.alerts_cached!(Time.now - start)
+ rescue
+ Stats.alerts_cached!('failed')
+ end
end
- cache_alerts_task.execution_interval = Server.options.cache_alerts_mins * 60
+ cache_alerts_task.execution_interval = Server.options.cache_alerts_secs
cache_alerts_task.execute
end
def self.cleanup
puts 'Cleaning old values and metrics'