bin/s3sync in s3sync-0.3.4 vs bin/s3sync in s3sync-1.2.5
- old
+ new
@@ -1,76 +1,735 @@
-#!/usr/bin/env ruby
+#! /System/Library/Frameworks/Ruby.framework/Versions/1.8/usr/bin/ruby
+# This software code is made available "AS IS" without warranties of any
+# kind. You may copy, display, modify and redistribute the software
+# code either by itself or as incorporated into your code; provided that
+# you do not remove any proprietary notices. Your use of this software
+# code is at your own risk and you waive any claim against the author
+# with respect to your use of this software code.
+# (c) 2007 s3sync.net
#
-# s3sync - Tool belt for managing your S3 buckets
-#
-# The MIT License (MIT)
-#
-# Copyright (c) 2013 Lincoln de Sousa <lincoln@clarete.li>
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to deal
-# in the Software without restriction, including without limitation the rights
-# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-# copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in
-# all copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-# THE SOFTWARE.
-$:.unshift(File.dirname(__FILE__) + '/../lib') unless $:.include?(File.dirname(__FILE__) + '/../lib')
+module S3sync
-require "s3sync/exceptions"
-require "s3sync/config"
-require "s3sync/cli"
+ $S3SYNC_MIME_TYPES_FILE = (ENV["S3SYNC_MIME_TYPES_FILE"] or '/etc/mime.types')
+
+ $S3SYNC_VERSION = '1.2.5'
-conf = S3Sync::Config.new
+ # always look "here" for include files (thanks aktxyz)
+ $LOAD_PATH << File.expand_path(File.dirname(__FILE__))
+
+ require 'getoptlong'
+ #require 'generator' # http://www.ruby-doc.org/stdlib/libdoc/generator/rdoc/classes/Generator.html
+ require 'thread_generator' # memory doesn't leak with this one, at least nothing near as bad
+ require 'md5'
+ require 'tempfile'
+ require 's3try'
+
+ # after other mods, so we don't overwrite yaml vals with defaults
+ require 's3config'
+ include S3Config
+
+ $S3syncDirString = '{E40327BF-517A-46e8-A6C3-AF51BC263F59}'
+ $S3syncDirTag = 'd66759af42f282e1ba19144df2d405d0'
+ $S3syncDirFile = Tempfile.new("s3sync")
+ $S3syncDirFile.puts $S3syncDirString
+ $S3syncDirFile.close # not final; we need this file again to 'put' directory nodes
+
+ if $S3SYNC_MIME_TYPES_FILE and FileTest.exist?($S3SYNC_MIME_TYPES_FILE)
+ File.open($S3SYNC_MIME_TYPES_FILE, 'r') do |f|
+ $mimeTypes = {}
+ f.each_line do |l|
+ if l =~ /^(\w\S+)\s+(\S.*)$/
+ type = $1
+ exts = $2.split
+ exts.each do |e|
+ $mimeTypes[e.to_s] = type.to_s
+ end
+ end
+ end
+ end
+ end
+
+ def S3sync.main
+ # ---------- OPTIONS PROCESSING ---------- #
+
+ $S3syncOptions = Hash.new
+ optionsParser = GetoptLong.new(
+ [ '--help', '-h', GetoptLong::NO_ARGUMENT ],
+ [ '--ssl', '-s', GetoptLong::NO_ARGUMENT ],
+ [ '--recursive','-r', GetoptLong::NO_ARGUMENT ],
+ [ '--public-read','-p', GetoptLong::NO_ARGUMENT ],
+ [ '--delete', GetoptLong::NO_ARGUMENT ],
+ [ '--verbose', '-v', GetoptLong::NO_ARGUMENT ],
+ [ '--dryrun', '-n', GetoptLong::NO_ARGUMENT ],
+ [ '--debug', '-d', GetoptLong::NO_ARGUMENT ],
+ [ '--memory', '-m', GetoptLong::NO_ARGUMENT ],
+ [ '--progress', GetoptLong::NO_ARGUMENT ],
+ [ '--expires', GetoptLong::REQUIRED_ARGUMENT ],
+ [ '--cache-control', GetoptLong::REQUIRED_ARGUMENT ],
+ [ '--exclude', GetoptLong::REQUIRED_ARGUMENT ],
+ [ '--make-dirs', GetoptLong::NO_ARGUMENT ],
+ [ '--no-md5', GetoptLong::NO_ARGUMENT ]
+ )
+
+ def S3sync.usage(message = nil)
+ $stderr.puts message if message
+ name = $0.split('/').last
+ $stderr.puts <<"ENDUSAGE"
+#{name} [options] <source> <destination>\t\tversion #{$S3SYNC_VERSION}
+ --help -h --verbose -v --dryrun -n
+ --ssl -s --recursive -r --delete
+ --public-read -p --expires="<exp>" --cache-control="<cc>"
+ --exclude="<regexp>" --progress --debug -d
+ --make-dirs --no-md5
+One of <source> or <destination> must be of S3 format, the other a local path.
+Reminders:
+* An S3 formatted item with bucket 'mybucket' and prefix 'mypre' looks like:
+ mybucket:mypre/some/key/name
+* Local paths should always use forward slashes '/' even on Windows
+* Whether you use a trailing slash on the source path makes a difference.
+* For examples see README.
+ENDUSAGE
+ exit
+ end #usage
+
+ begin
+ optionsParser.each {|opt, arg| $S3syncOptions[opt] = (arg || true)}
+ rescue StandardError
+ usage # the parser already printed an error message
+ end
+ usage if $S3syncOptions['--help']
+ $S3syncOptions['--verbose'] = true if $S3syncOptions['--dryrun'] or $S3syncOptions['--debug'] or $S3syncOptions['--progress']
+ $S3syncOptions['--ssl'] = true if $S3syncOptions['--ssl'] # change from "" to true to appease s3 port chooser
-# Time to load config and see if we've got everything we need to cook our salad
-begin
- conf.read
-rescue S3Sync::NoConfigFound => exc
- # We can't proceed without having those two vars set
- $stderr.puts "You didn't set up the following environment variables:"
- $stderr.puts
- exc.missing_vars.each {|var| $stderr.puts " * #{var}"}
- $stderr.puts
+
+ # ---------- CONNECT ---------- #
+ S3sync::s3trySetup
- $stderr.puts "I tried to load a config file from the following paths:"
- $stderr.puts
- exc.paths_checked.each {|path| $stderr.puts " * #{path}"}
- $stderr.puts
+ # ---------- PREFIX PROCESSING ---------- #
+
+ def S3sync.s3Prefix?(pre)
+ # allow for dos-like things e.g. C:\ to be treated as local even with colon
+ pre.include?(':') and not pre.match('^[A-Za-z]:[\\\\/]')
+ end
+ sourcePrefix, destinationPrefix = ARGV
+ usage("You didn't set up your environment variables; see README.txt") if not($AWS_ACCESS_KEY_ID and $AWS_SECRET_ACCESS_KEY)
+ usage('Need a source and a destination') if sourcePrefix == nil or destinationPrefix == nil
+ usage('Both arguments can\'t be on S3') if s3Prefix?(sourcePrefix) and s3Prefix?(destinationPrefix)
+ usage('One argument must be on S3') if !s3Prefix?(sourcePrefix) and !s3Prefix?(destinationPrefix)
- $stderr.puts "You could try to set the `S3SYNC_PATH' environment variable"
- $stderr.puts "pointing to a file to be loaded as your config file or just"
- $stderr.puts "export those variables to your environment like this:"
- $stderr.puts
- exc.missing_vars.each {|var|
- $stderr.puts " $ export #{var}=<value-provided-by-amazon>"
- }
- $stderr.puts
- $stderr.puts "Learn how to do that here: https://github.com/clarete/s3sync"
- exit
-end
+ # so we can modify them
+ sourcePrefix, destinationPrefix = sourcePrefix.dup, destinationPrefix.dup
-# Step aside, the star of this show is here. Let's try to create the
-# environment to run the requested command. And feed the user back if
-# information needed was not enough
-begin
- S3Sync::CLI::run conf
-rescue S3Sync::FailureFeedback => exc
- $stderr.puts exc.message
- exit 1
-rescue S3Sync::WrongUsage => exc
- $stderr.puts "Error:\n #{exc.msg}\n" if exc.msg
- exit exc.error_code
-rescue Interrupt
- $stderr.puts "Interrupted"
- exit 1
+ # handle trailing slash for source properly
+ if(sourcePrefix !~ %r{/$})
+ # no slash on end of source means we need to append the last src dir to dst prefix
+ # testing for empty isn't good enough here.. needs to be "empty apart from potentially having 'bucket:'"
+ slash = (destinationPrefix.empty? or destinationPrefix.match(%r{:$}))? "" : "/"
+ # not good enough.. sometimes this coughs up the bucket as a prefix destinationPrefix.replace(destinationPrefix + slash + sourcePrefix.split(/(?:\/|:)/).last)
+ # take everything at the end after a slash or colon
+ destinationPrefix.replace(destinationPrefix + slash + %r{([^/:]*)$}.match(sourcePrefix)[1])
+ end
+ # no trailing slash on dest, ever.
+ destinationPrefix.sub!(%r{/$}, "")
+
+ # don't repeat slashes
+ sourcePrefix.squeeze!('/')
+ destinationPrefix.squeeze!('/')
+
+ # here's where we find out what direction we're going
+ sourceIsS3 = s3Prefix?(sourcePrefix)
+ # alias these variables to the other strings (in ruby = does not make copies of strings)
+ s3Prefix = sourceIsS3 ? sourcePrefix : destinationPrefix
+ localPrefix = sourceIsS3 ? destinationPrefix : sourcePrefix
+
+ # canonicalize the S3 stuff
+ s3Bucket = (/^(.*?):/.match(s3Prefix))[1]
+ s3Prefix.replace((/:(.*)$/.match(s3Prefix))[1])
+ debug("s3Prefix #{s3Prefix}")
+ $S3SyncOriginalS3Prefix = s3Prefix.dup
+
+ # canonicalize the local stuff
+ # but that can kill a trailing slash, which we need to preserve long enough to know whether we mean "the dir" or "its contents"
+ # it will get re-stripped by the local generator after expressing this knowledge
+ localTrailingSlash = localPrefix.match(%r{/$})
+ localPrefix.replace(File.expand_path(localPrefix))
+ localPrefix += '/' if localTrailingSlash
+ debug("localPrefix #{localPrefix}")
+ # used for exclusion parsing
+ $S3SyncOriginalLocalPrefix = localPrefix.dup
+
+ # exclude preparation
+ # we don't want to build then throw away this regexp for each node in the universe; do it once globally
+ $S3SyncExclude = Regexp.new($S3syncOptions['--exclude']) if $S3syncOptions['--exclude']
+
+
+ # ---------- GENERATORS ---------- #
+
+
+ # a generator that will return the files/dirs of the local tree one by one
+ # sorted and decorated for easy comparison with the S3 tree
+ localTree = Generator.new do |g|
+ def S3sync.localTreeRecurse(g, prefix, path)
+ debug("localTreeRecurse #{prefix} #{path}")
+ #if $S3syncOptions['--memory']
+ # $stderr.puts "Starting local recurse"
+ # stats = ostats stats
+ #end
+ d = nil
+ begin
+ slash = prefix.empty? ? "" : "/"
+ d = Dir.new(prefix + slash + path)
+ rescue Errno::ENOENT
+ # ok the dir doesn't exist at all (this only really occurs for the root i.e. first dir)
+ return nil
+ rescue Errno::EACCES
+ # vista won't even let us touch some stuff in our own profile
+ return nil
+ end
+ # do some pre-processing
+ # the following sleight of hand is to make the recursion match the way s3 sorts
+ # take for example the directory 'foo' and the file 'foo.bar'
+ # when we encounter the dir we would want to recurse into it
+ # but S3 would just say 'period < slash' and sort 'foo.bar' between the dir node
+ # and the contents in that 'dir'
+ #
+ # so the solution is to not recurse into the directory until the point where
+ # it would come up "next" in the S3 list
+ # We have to do these hoops on the local side, because we have very little control
+ # over how S3 will return its results
+ toAdd = Array.new
+ d.each do |name|
+ slash = path.empty? ? "" : "/"
+ partialPath = path + slash + name
+ slash = prefix.empty? ? "" : "/"
+ fullPath = prefix + slash + partialPath
+ if name == "." or name == ".."
+ # skip
+ else
+ # add a dir node if appropriate
+ debug("Test #{fullPath}")
+ if ((not FileTest.symlink?(fullPath)) and FileTest.directory?(fullPath)) and $S3syncOptions['--recursive']
+ debug("Adding it as a dir node")
+ toAdd.push(name + '/') # always trail slash here for sorting purposes (removed below with rindex test)
+ end
+ end
+ end
+ dItems = d.collect + toAdd
+ d.close
+ d = toAdd = nil
+ dItems.sort! #aws says we will get alpha sorted results but ruby doesn't
+ dItems.each do |name|
+ isDirNode = false
+ if name.rindex('/') == name.length-1
+ name = name.slice(0...name.length-1)
+ isDirNode = true
+ debug("#{name} is a dir node")
+ end
+ slash = path.empty? ? "" : "/"
+ partialPath = path + slash + name
+ slash = prefix.empty? ? "" : "/"
+ fullPath = prefix + slash + partialPath
+ excludePath = fullPath.slice($S3SyncOriginalLocalPrefix.length...fullPath.length)
+ if name == "." or name == ".."
+ # skip
+ elsif $S3SyncExclude and $S3SyncExclude.match(excludePath)
+ debug("skipping local item #{excludePath} because of --exclude")
+ elsif isDirNode
+ localTreeRecurse(g, prefix, partialPath)
+ else
+ # a normal looking node we should try to process
+ debug("local item #{fullPath}")
+ g.yield(LocalNode.new(prefix, partialPath))
+ end
+ end
+ #if $S3syncOptions['--memory']
+ # $stderr.puts "Ending local recurse"
+ # stats = ostats stats
+ #end
+ end
+ # a bit of a special case for local, since "foo/" and "foo" are essentially treated the same by file systems
+ # so we need to think harder about what the user really meant in the command line.
+ localPrefixTrim = localPrefix
+ if localPrefix !~ %r{/$}
+ # no trailing slash, so yield the root itself first, then recurse if appropriate
+ # gork this is still not quite good enough.. if local is the dest then we don't know whether s3 will have a root dir node yielded a priori, so we can't know whether to do this. only matters for --erase though
+ g.yield(LocalNode.new(localPrefixTrim, "")) # technically we should check this for exclusion, but excluding the root node is kind of senseless.. and that would be a pain to set up here
+ localTreeRecurse(g, localPrefixTrim, "") if $S3syncOptions['--recursive']
+ else
+ # trailing slash, so ignore the root itself, and just go into the first level
+ localPrefixTrim.sub!(%r{/$}, "") # strip the slash because of how we do local node slash accounting in the recurse above
+ localTreeRecurse(g, localPrefixTrim, "")
+ end
+ end
+
+ # a generator that will return the nodes in the S3 tree one by one
+ # sorted and decorated for easy comparison with the local tree
+ s3Tree = Generator.new do |g|
+ def S3sync.s3TreeRecurse(g, bucket, prefix, path)
+ if $S3syncOptions['--memory']
+ $stderr.puts "Starting S3 recurse"
+ GC.start
+ stats = ostats stats
+ end
+ $stderr.puts "s3TreeRecurse #{bucket} #{prefix} #{path}" if $S3syncOptions['--debug']
+ nextPage = true
+ marker = ''
+ while nextPage do
+ fullPrefix = prefix + path
+ debug("nextPage: #{marker}") if marker != ''
+ options = {}
+ options['prefix'] = fullPrefix # start at the right depth
+ options['delimiter'] = '/' # only one dir at a time please
+ options['max-keys'] = '200' # use manageable chunks
+ options['marker'] = marker unless marker == ''
+ d = S3sync.S3try(:list_bucket, bucket, options)
+ $stderr.puts "S3 ERROR: #{d.http_response}" unless d.http_response.is_a? Net::HTTPSuccess
+ # the 'directories' and leaf nodes are in two separate collections
+ # because a dir will never have the same name as a node, we can just shove them together and sort
+ # it's important to evaluate them alphabetically for efficient comparison to the local tree
+ tItems = d.entries + d.common_prefix_entries
+ tItems.sort! do |a,b|
+ aName = a.respond_to?('key') ? a.key : a.prefix
+ bName = b.respond_to?('key') ? b.key : b.prefix
+ # the full path will be returned, efficient to ignore the part we know will be in common
+ aName.slice(fullPrefix.length..aName.length) <=> bName.slice(fullPrefix.length..bName.length)
+ end
+ # get rid of the big s3 objects asap, just save light-weight nodes and strings
+ items = tItems.collect do |item|
+ if item.respond_to?('key')
+ key = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.key).join
+ Node.new(key, item.size, item.etag, item.last_modified)
+ else
+ Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", item.prefix).join
+ end
+ end
+ nextPage = d.properties.is_truncated
+ marker = (d.properties.next_marker)? d.properties.next_marker : ((d.entries.length > 0)? d.entries.last.key : '')
+ # get this into native char set (because when we feed it back to s3 that's what it will expect)
+ marker = Iconv.iconv($S3SYNC_NATIVE_CHARSET, "UTF-8", marker).join
+ tItems = nil
+ d = nil # get rid of this before recursing; it's big
+ item = nil
+ GC.start # not sure but I think yielding before doing this is causing evil closure bloat
+ items.each do |item|
+ if not (item.kind_of? String)
+ # this is an item
+ excludePath = item.name.slice($S3SyncOriginalS3Prefix.length...item.name.length)
+ if $S3SyncExclude and $S3SyncExclude.match(excludePath)
+ debug("skipping S3 item #{excludePath} due to --exclude")
+ else
+ debug("S3 item #{item.name}")
+ g.yield(S3Node.new(bucket, prefix, item))
+ end
+ else
+ # it's a prefix (i.e. there are sub keys)
+ partialPath = item.slice(prefix.length..item.length) # will have trailing slash
+ excludePath = item.slice($S3SyncOriginalS3Prefix.length...item.length)
+ # recurse
+ if $S3SyncExclude and $S3SyncExclude.match(excludePath)
+ debug("skipping prefix #{excludePath} due to --exclude")
+ else
+ debug("prefix found: #{partialPath}")
+ s3TreeRecurse(g, bucket, prefix, partialPath) if $S3syncOptions['--recursive']
+ end
+ end
+ end
+ items = nil
+ end # of while nextPage
+ if $S3syncOptions['--memory']
+ $stderr.puts "Ending S3 recurse"
+ GC.start
+ stats = ostats stats
+ end
+ end
+ # this will yield the root node first and then recurse
+ s3TreeRecurse(g, s3Bucket, s3Prefix, "")
+
+ end
+
+ # alias the tree objects so we don't care below which direction the transfer is going
+ if sourceIsS3
+ sourceTree, destinationTree = s3Tree, localTree
+ else
+ sourceTree, destinationTree = localTree, s3Tree
+ end
+
+
+ # ---------- COMPARATOR ---------- #
+
+ # run the comparison engine and act according to what we find for each check
+ nodesToDelete = Array.new # a stack. have to delete in reverse order of normal create/update processing
+
+ sourceNode = sourceTree.next? ? sourceTree.next : nil
+ destinationNode = destinationTree.next? ? destinationTree.next : nil
+ while sourceNode or destinationNode do
+ debug("source: #{sourceNode.name}") if sourceNode
+ debug("dest: #{destinationNode.name}") if destinationNode
+ if (!destinationNode) or (sourceNode and (sourceNode.name < destinationNode.name))
+ dNode =
+ if sourceNode.kind_of? LocalNode
+ S3Node.new(s3Bucket, s3Prefix, sourceNode.name)
+ else
+ LocalNode.new(localPrefix, sourceNode.name)
+ end
+ puts "Create node #{sourceNode.name}" if $S3syncOptions['--verbose']
+ dNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
+ sourceNode = sourceTree.next? ? sourceTree.next : nil
+ elsif (!sourceNode) or (destinationNode and (sourceNode.name > destinationNode.name))
+ $stderr.puts "Source does not have #{destinationNode.name}" if $S3syncOptions['--debug']
+ if $S3syncOptions['--delete']
+ if destinationNode.directory?
+ # have to wait
+ nodesToDelete.push(destinationNode)
+ else
+ puts "Remove node #{destinationNode.name}" if $S3syncOptions['--verbose']
+ destinationNode.delete unless $S3syncOptions['--dryrun']
+ end
+ end
+ destinationNode = destinationTree.next? ? destinationTree.next : nil
+ elsif sourceNode.name == destinationNode.name
+ if (sourceNode.size != destinationNode.size) or (($S3syncOptions['--no-md5'])? (sourceNode.date > destinationNode.date) : (sourceNode.tag != destinationNode.tag))
+ puts "Update node #{sourceNode.name}" if $S3syncOptions['--verbose']
+ destinationNode.updateFrom(sourceNode) unless $S3syncOptions['--dryrun']
+ elsif $S3syncOptions['--debug']
+ $stderr.puts "Node #{sourceNode.name} unchanged"
+ end
+ sourceNode = sourceTree.next? ? sourceTree.next : nil
+ destinationNode = destinationTree.next? ? destinationTree.next : nil
+ end
+ end
+
+ # get rid of the (now empty, except for other directories) directories
+ nodesToDelete.reverse_each do |node|
+ puts "Remove node #{node.name}" if $S3syncOptions['--verbose']
+ node.delete unless $S3syncOptions['--dryrun']
+ end
+
+ end #main
+
+
+
+ # ---------- NODE ---------- #
+
+ class Node
+ attr_reader :name
+ attr_reader :size
+ attr_reader :tag
+ attr_reader :date
+ def initialize(name='', size = 0, tag = '', date = Time.now.utc)
+ @name = name
+ @size = size
+ @tag = tag
+ @date = date
+ end
+ def directory?()
+ @tag == $S3syncDirTag and @size == $S3syncDirString.length
+ end
+ end
+
+ # ---------- S3Node ---------- #
+
+ class S3Node < Node
+ @path = nil
+ @bucket = nil
+ @result = nil
+ def initialize(bucket, prefix, itemOrName)
+ @bucket = bucket
+ if itemOrName.kind_of? String
+ @name = itemOrName
+ @name.sub!(%r{/$}, "") # don't create directories with a slash on the end
+ #6/2007. the prefix can be filled but the name empty, in the case of s3sync -r somedir somebucket:
+ if (not prefix.empty? and @name.empty?)
+ @name = prefix
+ itemOrName = prefix
+ prefix = ""
+ end
+ slash = prefix.empty? ? "" : "/"
+ @path = prefix + slash + itemOrName
+ else
+ @name = (itemOrName.name.slice((prefix.length)..itemOrName.name.length) or '')
+ # depending whether the prefix is / tailed, the name might need trimming
+ @name.sub!(%r{^/},"") # get rid of leading slash in name if there (from above simplistic split)
+ @name.sub!(%r{/$}, "") # don't create directories with a slash on the end
+ @path = itemOrName.name
+ @path.sub!(%r{/$}, "") # don't create directories with a slash on the end
+ @size = itemOrName.size
+ @tag = itemOrName.tag.gsub(/"/,'')
+ @date = Time.xmlschema(itemOrName.date)
+ end
+ debug("s3 node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
+ end
+ # get this item from s3 into the provided stream
+ # S3 pushes to the local item, due to how http streaming is implemented
+ def to_stream(s)
+ @result = S3sync.S3try(:get_stream, @bucket, @path, {}, s)
+ end
+ def symlink?()
+ unless @result
+ @result = S3sync.S3try(:head, @bucket, @path)
+ end
+ debug("symlink value is: #{@result.object.metadata['symlink']}")
+ @result.object.metadata['symlink'] == 'true'
+ end
+ def owner
+ unless @result
+ @result = S3sync.S3try(:head, @bucket, @path)
+ end
+ debug("Owner of this s3 node is #{@result.object.metadata['owner']}")
+ @result.object.metadata['owner'].to_i # if not there, will be nil => 0 which == root so good default
+ end
+ def group
+ unless @result
+ @result = S3sync.S3try(:head, @bucket, @path)
+ end
+ @result.object.metadata['group'].to_i # 0 default ok
+ end
+ def permissions
+ g = @result.object.metadata['permissions']
+ g ? g.to_i : 600 # default to owner only
+ end
+ def updateFrom(fromNode)
+ if fromNode.respond_to?(:stream)
+ meta = Hash.new
+ meta['owner'] = fromNode.owner.to_s
+ meta['group'] = fromNode.group.to_s
+ meta['permissions'] = fromNode.permissions.to_s
+ meta['symlink'] = 'true' if fromNode.symlink?
+ begin
+ theStream = fromNode.stream
+ theStream = ProgressStream.new(theStream, fromNode.size) if $S3syncOptions['--progress']
+
+ s3o = S3::S3Object.new(theStream, meta)
+ debug(@path)
+ headers = {'Content-Length' => (fromNode.size.respond_to?(:nonzero?) ? fromNode.size.to_s : '0')}
+ headers['x-amz-acl'] = 'public-read' if $S3syncOptions['--public-read']
+ headers['Expires'] = $S3syncOptions['--expires'] if $S3syncOptions['--expires']
+ headers['Cache-Control'] = $S3syncOptions['--cache-control'] if $S3syncOptions['--cache-control']
+ fType = @path.split('.').last
+ debug("File extension: #{fType}")
+ if defined?($mimeTypes) and fType != '' and (mType = $mimeTypes[fType]) and mType != ''
+ debug("Mime type: #{mType}")
+ headers['Content-Type'] = mType
+ end
+ @result = S3sync.S3try(:put, @bucket, @path, s3o, headers)
+ theStream.close if (theStream and not theStream.closed?)
+ rescue NoMethodError
+ # when --progress is used and we can't get the stream object, it doesn't report as null
+ # so the above .closed? test will break
+ $stderr.puts "Skipping #{@path}: " + $!
+ rescue SystemCallError
+ theStream.close if (theStream and not theStream.closed?)
+ $stderr.puts "Skipping #{@path}: " + $!
+ end
+ else
+ raise "Node provided as update source doesn't support :stream"
+ end
+ end
+ def delete
+ @result = S3sync.S3try(:delete, @bucket, @path)
+ end
+ end
+
+ # ---------- LocalNode ---------- #
+
+ class LocalNode < Node
+ @path = nil
+ def initialize(prefix, partialPath)
+ slash = prefix.empty? ? "" : "/"
+ @path = prefix + slash + partialPath
+ # slash isn't at the front of this any more @name = (partialPath.slice(1..partialPath.length) or '')
+ @name = partialPath or ''
+ if FileTest.symlink?(@path)
+ # this could use the 'file' case below, but why create an extra temp file
+ linkData = File.readlink(@path)
+ $stderr.puts "link to: #{linkData}" if $S3syncOptions['--debug']
+ @size = linkData.length
+ unless $S3syncOptions['--no-md5']
+ md5 = Digest::MD5.new()
+ md5 << linkData
+ @tag = md5.hexdigest
+ end
+ @date = File.lstat(@path).mtime.utc
+ elsif FileTest.file?(@path)
+ @size = FileTest.size(@path)
+ data = nil
+ begin
+ unless $S3syncOptions['--no-md5']
+ data = self.stream
+ md5 = Digest::MD5.new()
+ while !data.eof?
+ md5 << data.read(2048) # stream so it's not taking all memory
+ end
+ data.close
+ @tag = md5.hexdigest
+ end
+ @date = File.stat(@path).mtime.utc
+ rescue SystemCallError
+ # well we're not going to have an md5 that's for sure
+ @tag = nil
+ end
+ elsif FileTest.directory?(@path)
+ # all s3 directories are dummy nodes contain the same directory string
+ # so for easy comparison, set our size and tag thusly
+ @size = $S3syncDirString.length
+ @tag = $S3syncDirTag
+ @date = File.stat(@path).mtime.utc
+ end
+ debug("local node object init. Name:#{@name} Path:#{@path} Size:#{@size} Tag:#{@tag} Date:#{@date}")
+ end
+ # return a stream that will read the contents of the local item
+ # local gets pulled by the S3Node update fn, due to how http streaming is implemented
+ def stream
+ begin
+ # 1.0.8 switch order of these tests because a symlinked file will say yes to 'file?'
+ if FileTest.symlink?(@path) or FileTest.directory?(@path)
+ tf = Tempfile.new('s3sync')
+ if FileTest.symlink?(@path)
+ tf.printf('%s', File.readlink(@path))
+ elsif FileTest.directory?(@path)
+ tf.printf('%s', $S3syncDirString)
+ end
+ tf.close
+ tf.open
+ tf
+ elsif FileTest.file?(@path)
+ File.open(@path, 'rb')
+ end
+ rescue SystemCallError
+ $stderr.puts "Could not read #{@path}: #{$!}"
+ raise
+ end
+ end
+ def stat
+ FileTest.symlink?(@path) ? File.lstat(@path) : File.stat(@path)
+ end
+ def exist?
+ FileTest.exist?(@path) or FileTest.symlink?(@path)
+ end
+ def owner
+ self.exist? ? self.stat().uid : 0
+ end
+ def group
+ self.exist? ? self.stat().gid : 0
+ end
+ def permissions
+ self.exist? ? self.stat().mode : 600
+ end
+ def updateFrom(fromNode)
+ if fromNode.respond_to?(:to_stream)
+ fName = @path + '.s3syncTemp'
+ # handle the case where the user wants us to create dirs that don't exist in S3
+ if $S3syncOptions['--make-dirs']
+ # ensure target's path exists
+ dirs = @path.split('/')
+ # but the last one is a file name
+ dirs.pop()
+ current = ''
+ dirs.each do |dir|
+ current << dir << '/'
+ begin
+ Dir.mkdir(current) unless FileTest.exist?(current)
+ rescue SystemCallError
+ $stderr.puts "Could not mkdir #{current}: #{$!}"
+ end
+ end
+ end
+ unless fromNode.directory?
+ f = File.open(fName, 'wb')
+ f = ProgressStream.new(f, fromNode.size) if $S3syncOptions['--progress']
+
+ fromNode.to_stream(f)
+ f.close
+ end
+ # get original item out of the way
+ File.unlink(@path) if File.exist?(@path)
+ if fromNode.symlink?
+ linkTo = ''
+ File.open(fName, 'rb'){|f| linkTo = f.read}
+ debug("#{@path} will be a symlink to #{linkTo}")
+ begin
+ File.symlink(linkTo, @path)
+ rescue NotImplementedError
+ # windows doesn't do symlinks, for example
+ # just bail
+ File.unlink(fName) if File.exist?(fName)
+ return
+ rescue SystemCallError
+ $stderr.puts "Could not write symlink #{@path}: #{$!}"
+ end
+ elsif fromNode.directory?
+ # only get here when the dir doesn't exist. else they'd compare ==
+ debug(@path)
+ begin
+ Dir.mkdir(@path) unless FileTest.exist?(@path)
+ rescue SystemCallError
+ $stderr.puts "Could not mkdir #{@path}: #{$!}"
+ end
+
+ else
+ begin
+ File.rename(fName, @path)
+ rescue SystemCallError
+ $stderr.puts "Could not write (rename) #{@path}: #{$!}"
+ end
+
+ end
+ # clean up if the temp file is still there (as for links)
+ File.unlink(fName) if File.exist?(fName)
+
+ # update permissions
+ linkCommand = fromNode.symlink? ? 'l' : ''
+ begin
+ File.send(linkCommand + 'chown', fromNode.owner, fromNode.group, @path)
+ File.send(linkCommand + 'chmod', fromNode.permissions, @path)
+ rescue NotImplementedError
+ # no one has lchmod, but who really cares
+ rescue SystemCallError
+ $stderr.puts "Could not change owner/permissions on #{@path}: #{$!}"
+ end
+ else
+ raise "Node provided as update source doesn't support :to_stream"
+ end
+ end
+ def symlink?()
+ FileTest.symlink?(@path)
+ end
+ def delete
+ # don't try to delete the restore root dir
+ # this is a quick fix to deal with the fact that the tree recurse has to visit the root node
+ return unless @name != ''
+ return unless FileTest.exist?(@path)
+ begin
+ if FileTest.directory?(@path)
+ Dir.rmdir(@path)
+ else
+ File.unlink(@path)
+ end
+ rescue SystemCallError
+ $stderr.puts "Could not delete #{@path}: #{$!}"
+ end
+ end
+ end
+
+
+end #module
+
+def debug(str)
+ $stderr.puts str if $S3syncOptions['--debug']
end
+
+def ostats(last_stat = nil)
+ stats = Hash.new(0)
+ ObjectSpace.each_object {|o| stats[o.class] += 1}
+
+ stats.sort {|(k1,v1),(k2,v2)| v2 <=> v1}.each do |k,v|
+ $stderr.printf "%-30s %10d", k, v
+ $stderr.printf " delta %10d", (v - last_stat[k]) if last_stat
+ $stderr.puts
+ end
+
+ stats
+end
+
+# go!
+S3sync::main
\ No newline at end of file