# s3sync - Tool belt for managing your S3 buckets # # The MIT License (MIT) # # Copyright (c) 2013 Lincoln de Sousa # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal # in the Software without restriction, including without limitation the rights # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell # copies of the Software, and to permit persons to whom the Software is # furnished to do so, subject to the following conditions: # # The above copyright notice and this permission notice shall be included in # all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN # THE SOFTWARE. # Part of this software was inspired by the original s3sync, so here's their # copyright notice: # (c) 2007 s3sync.net # # This software code is made available "AS IS" without warranties of any # kind. You may copy, display, modify and redistribute the software # code either by itself or as incorporated into your code; provided that # you do not remove any proprietary notices. Your use of this software # code is at your own risk and you waive any claim against the author # with respect to your use of this software code. require 'find' require 'fileutils' require 's3sync/util' module S3Sync class Location attr_accessor :path attr_accessor :bucket def initialize path, bucket=nil raise RuntimeError if path.nil? @path = path @bucket = bucket || nil end def to_s out = [] out << @bucket unless @bucket.nil? out << @path out.join ':' end def local? @bucket.nil? end def == other @path == other.path and @bucket == other.bucket end alias eql? == end class Node include Comparable SMALL_FILE = 50 * 1024 # 50.kilobytes attr_accessor :base attr_accessor :path attr_accessor :size attr_accessor :small_comparator def initialize base, path, size, small_comparator = nil @base = base @path = path @size = size @small_comparator = small_comparator end def full S3Sync.safe_join [@base, @path] end def == other @size == other.size && compare_small_comparators(other) end alias eql? == private # If files are small and both nodes have a comparator, we can call an extra # provided block to verify equality. This allows def compare_small_comparators(other) return true if @size > SMALL_FILE || other.size > SMALL_FILE return true if small_comparator.nil? || other.small_comparator.nil? small_comparator.call == other.small_comparator.call end end class LocalDirectory attr_accessor :source def initialize source @source = source end def list_files nodes = {} # Create the directory if it does not exist if not File.exists?(@source) FileUtils.mkdir_p(@source) return nodes end # The path declared in `@source` exists, yay! Now we need read # the whole directory and add all the readable files to the # `nodes` hash. Find.find(@source) do |file| begin st = File.stat file # Might fail raise if not st.readable? # We're not interested in things we can't read rescue $stderr.puts "WARNING: Skipping unreadable file #{file}" Find.prune end # We don't support following symlinks for now, we don't need to follow # folders and I don't think we care about any other thing, right? next unless st.file? # We only need the relative path here file_name = file.gsub(/^#{@source}\/?/, '').squeeze('/') small_comparator = lambda { Digest::MD5.hexdigest File.read(file) } node = Node.new(@source.squeeze('/'), file_name, st.size, small_comparator) nodes[node.path] = node end return nodes end end class SyncCommand def self.cmp hash1, hash2 same, to_add_to_2 = [], [] hash1.each do |key, value| value2 = hash2.delete key if value2.nil? to_add_to_2 << value elsif value2 == value same << value else to_add_to_2 << value end end to_remove_from_2 = hash2.values [same, to_add_to_2, to_remove_from_2] end def initialize args, source, destination @args = args @source = source @destination = destination end def run # Reading the source and destination using our helper method if (source, destination, bucket = self.class.parse_params [@source, @destination]).nil? raise WrongUsage.new(nil, 'Need a source and a destination') end # Getting the trees source_tree, destination_tree = read_trees source, destination # Getting the list of resources to be exchanged between the two peers _, to_add, to_remove = self.class.cmp source_tree, destination_tree # Removing the items matching the exclude pattern if requested to_add.select! { |e| begin (e.path =~ /#{@args.exclude}/).nil? rescue RegexpError => exc raise WrongUsage.new nil, exc.message end } if @args.exclude # Calling the methods that perform the actual IO if source.local? upload_files destination, to_add remove_files destination, to_remove unless @args.keep else download_files destination, source, to_add remove_local_files destination, source, to_remove unless @args.keep end end def self.parse_params args # Reading the arbitrary parameters from the command line and getting # modifiable copies to parse source, destination = args; return nil if source.nil? or destination.nil? # Sync from one s3 to another is currently not supported if remote_prefix? source and remote_prefix? destination raise WrongUsage.new(nil, 'Both arguments can\'t be on S3') end # C'mon, there's rsync out there if !remote_prefix? source and !remote_prefix? destination raise WrongUsage.new(nil, 'One argument must be on S3') end source, destination = process_destination source, destination return [Location.new(*source), Location.new(*destination)] end def self.remote_prefix?(prefix) # allow for dos-like things e.g. C:\ to be treated as local even with # colon. prefix.include? ':' and not prefix.match '^[A-Za-z]:[\\\\/]' end def self.process_file_destination source, destination, file="" if not file.empty? sub = (remote_prefix? source) ? source.split(":")[1] : source file = file.gsub(/^#{sub}/, '') end # no slash on end of source means we need to append the last src dir to # dst prefix testing for empty isn't good enough here.. needs to be # "empty apart from potentially having 'bucket:'" if source =~ %r{/$} if remote_prefix? destination and destination.end_with? ':' S3Sync.safe_join [destination, file] else File.join [destination, file] end else if remote_prefix? source _, name = source.split ":" File.join [destination, File.basename(name || ""), file] else source = /^\/?(.*)/.match(source)[1] # Corner case: the root of the remote path is empty, we don't want to # add an unnecessary slash here. if destination.end_with? ':' File.join [destination + source, file] else File.join [destination, source, file] end end end end def self.process_destination source, destination source, destination = source.dup, destination.dup # don't repeat slashes source.squeeze! '/' destination.squeeze! '/' # Making sure that local paths won't break our stuff later source.gsub!(/^\.\//, '') destination.gsub!(/^\.\//, '') # Parsing the final destination destination = process_file_destination source, destination, "" # here's where we find out what direction we're going source_is_s3 = remote_prefix? source # canonicalize the S3 stuff remote_prefix = source_is_s3 ? source : destination bucket, remote_prefix = remote_prefix.split ":" remote_prefix ||= "" # Just making sure we preserve the direction if source_is_s3 [[remote_prefix, bucket], destination] else [source, [remote_prefix, bucket]] end end def read_tree_remote location dir = location.path dir += '/' if not dir.empty? and not dir.end_with?('/') nodes = {} @args.s3.buckets[location.bucket].objects.with_prefix(dir || "").to_a.collect do |obj| # etag comes back with quotes (obj.etag.inspcet # => "\"abc...def\"" small_comparator = lambda { obj.etag[/[a-z0-9]+/] } node = Node.new(location.path, obj.key, obj.content_length, small_comparator) nodes[node.path] = node end return nodes end def read_trees source, destination if source.local? source_tree = LocalDirectory.new(source.path).list_files destination_tree = read_tree_remote destination else source_tree = read_tree_remote source destination_tree = LocalDirectory.new(destination.path).list_files end [source_tree, destination_tree] end def upload_files remote, list list.each do |e| if @args.verbose puts " + #{e.full} => #{remote}#{e.path}" end unless @args.dry_run remote_path = "#{remote.path}#{e.path}" @args.s3.buckets[remote.bucket].objects[remote_path].write Pathname.new(e.full), :acl => @args.acl end end end def remove_files remote, list if @args.verbose list.each {|e| puts " - #{remote}#{e.path}" } end unless @args.dry_run @args.s3.buckets[remote.bucket].objects.delete_if { |obj| list.map(&:path).include? obj.key } end end def download_files destination, source, list puts list list.each {|e| path = File.join destination.path, e.path if @args.verbose puts " + #{source}#{e.path} => #{path}" end unless @args.dry_run obj = @args.s3.buckets[source.bucket].objects[e.path] # Making sure this new file will have a safe shelter FileUtils.mkdir_p File.dirname(path) # in some cases the s3 object will have a trailing '/' indicating # a folder (this behavior noticed when the s3 folder is # created by Transmit) if path[-1] == '/' FileUtils.mkdir_p path else # Downloading and saving the files File.open(path, 'wb') do |file| begin obj.read do |chunk| file.write chunk end rescue AWS::Core::Http::NetHttpHandler::TruncatedBodyError => e $stderr.puts "WARNING: (retryable) TruncatedBodyError occured, retrying in a second #{file.basename}" sleep 1 retry end end end end } end def remove_local_files destination, source, list list.each {|e| path = File.join destination.path, e.path if @args.verbose puts " * #{e.path} => #{path}" end unless @args.dry_run FileUtils.rm_rf path end } end end end