require 'aws' require 'insxsync' require 'cge_backup' require 'tmpdir' # Provides a class to interface with Amazon's S3 service to download, extract and process # MySQL CGE backups from the production system. # @!attribute [r] dumpDir # @return [String] a string containing the working directory for all downloads and processing class CGEBackupCollection < ErrorHandlingIface # Configures and returns a CGEBackupCollection object. Connects to AWS S3 and builds a collection of CGEBackup objects. def initialize # this assumes the role on the production account to access CGE backups sts = AWS::STS.new(:access_key_id=>$AWS_ID, :secret_access_key=>$AWS_SECRET) role = sts.assume_role(:role_arn => $ROLE_ARN, :role_session_name => SecureRandom.uuid.gsub(/-/,''), :duration_seconds => 60 * 15) # store role credentials creds = role[:credentials] # open the CGE backup bucket cge_bucket = AWS::S3.new(creds).buckets[$CGE_BUCKET] temp1 = Array.new # Retrieve all object names cge_bucket.objects.each do |obj| # which match the standard filename template if obj.key =~ /BACKUP-\d*_\d-[0-9a-f]*.tar/ temp1.push(obj.key) end end #sort them by time temp1.sort! do |a, b| time1 = parse_time(a) time2 = parse_time(b) time1 <=> time2 end temp2 = Array.new # This block removes any backup that doesn't contain files uploaded by each node # the temp array should be sorted by backup time. All backups should be 2 files (one for each node) # If the previous file and the next file don't have the same time as any given file, # then the given file must be by itself. begin # Push a backup temp2.push(temp1.pop) time = parse_time(temp2.last) # if the pushed file's time matched the previous file's or next file's time, continue. # Otherwise, remove the pushed file from the array. if time == parse_time(temp2.fetch(-2, '')) next elsif time == parse_time(temp1.fetch(-1, '')) next else temp2.pop end end until temp1.count == 0 @backups = Array.new @backups.push(CGEBackup.new(temp2.pop, temp2.pop, @backups.count + 1)) until temp2.count == 0 end def [](id) @backups[id - 1] end # Displays a list of all available backups. def list puts ' ID | Epoch Time | Date and Time ' puts '-------------------------------------------------------' @backups.each do |value| puts "#{value.id.to_s.center(16)}|#{value.epoch_time.center(16)}|#{value.date_time.center(21)}" end end # Retrieves sql dumps from S3 and saves them to them # @overload archive(time) # Stores the latest dump to the bucket # @param [Time] time the time of the sync point to archive to # @overload archive(time, dump_id) # Stores the dump with the specified id to @dumpDir and scrubs it # @param [Time] time the time of the sync point to archive to # @param [Integer] dump_id the id of the dumps to retrieve def archive(time, dump_id=nil) failValidation if dump_id.not_nil? and not dump_id.is_a?(Integer) failValidation if time.nil? or not time.is_a?(Time) # Default to latest dump id = dump_id.nil? ? @backups.count : dump_id Dir.mktmpdir do |dir| backup = self[id] print "Downloading CGE backup and extracting...\t\t" backup.download(dir) backup.extract puts "Done." print "Uploading database synchronization data...\t\t" backup.archive(time) puts "Done." end #FileUtils.remove_entry_secure dir end private def parse_time(backup) backup.scan(/BACKUP-(\d*)_\d-[0-9a-f]*.tar/).flatten[0] end def get_usage #Usage cases for all the usage in this class if @usage.nil? init_usage @usage['archive_dumps'] = ['time = Time, dump_id = Integer'] end super end end