# # Copyright (c) 2013 RightScale Inc # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. require 'right_aws' module RightDevelop module S3 # Provides a Ruby OOP interface to Amazon S3. # # Note: filters are used as options for multiple storage actions below and # refers to an array of Regexp or wildcard-style filter strings # (e.g. '*.txt'). they are used to match file paths relative to a given # subdirectory or else from the root of the bucket or directory on disk). class Interface NO_SLASHES_REGEXP = /^[^\/]+$/ DEFAULT_OPTIONS = { :filters => nil, :subdirectory => nil, :recursive => true, :aws_access_key_id => nil, :aws_secret_access_key => nil, :logger => nil }.freeze # @option options [String] :aws_access_key_id defaults to using env var value # @option options [String] :aws_secret_access_key defaults to using env var value # @option options [Logger] :logger or nil to log to STDOUT def initialize(options={}) options = DEFAULT_OPTIONS.merge(options) aws_access_key_id = options[:aws_access_key_id] aws_secret_access_key = options[:aws_secret_access_key] unless aws_access_key_id && aws_secret_access_key raise ::ArgumentError, 'Missing one or both mandatory options - :aws_access_key_id and :aws_secret_access_key' end @logger = options[:logger] || Logger.new(STDOUT) @s3 = ::RightAws::S3Interface.new(aws_access_key_id, aws_secret_access_key, :logger => @logger) end attr_accessor :logger # Lists the files in the given bucket. # # @param [String] bucket to query # @option options [String] :subdirectory to start from or nil # @option options [TrueClass|FalseClass] :recursive true if recursive (default) # @option options [Array] :filters for returned paths or nil or empty # @return [Array] list of relative file paths or empty def list_files(bucket, options={}) options = DEFAULT_OPTIONS.dup.merge(options) prefix = normalize_subdirectory_path(options[:subdirectory]) filters = normalize_filters(options) files = [] trivial_filters = filters.select { |filter| filter.is_a?(String) } if trivial_filters.empty? @s3.incrementally_list_bucket(bucket, 'prefix' => prefix) do |response| incremental_files = response[:contents].map do |details| details[:key][(prefix.length)..-1] end files += filter_files(incremental_files, filters) end else trivial_filters.each do |filename| begin # use head to query file existence. @s3.head(bucket, "#{prefix}#{filename}") files << filename rescue RightAws::AwsError => e # do nothing if file not found raise unless '404' == e.http_code end end end return files end # Downloads all files from the given bucket to the given directory. # # @param [String] bucket for download # @param [String] to_dir_path source directory to upload # @option options [String] :subdirectory to start from or nil # @option options [TrueClass|FalseClass] :recursive true if recursive (default) # @option options [Array] :filters for returned paths or nil or empty # @return [Fixnum] count of uploaded files def download_files(bucket, to_dir_path, options={}) options = DEFAULT_OPTIONS.dup.merge(options) prefix = normalize_subdirectory_path(options[:subdirectory]) files = list_files(bucket, options) if files.empty? logger.info("No files found in \"#{bucket}/#{prefix}\"") else logger.info("Downloading #{files.count} files...") prefix = normalize_subdirectory_path(options[:subdirectory]) downloaded = 0 files.each do |path| key = "#{prefix}#{path}" to_file_path = File.join(to_dir_path, path) parent_path = File.dirname(to_file_path) FileUtils.mkdir_p(parent_path) unless File.directory?(parent_path) disk_file = to_file_path file_md5 = File.exist?(disk_file) && Digest::MD5.hexdigest(File.read(disk_file)) if file_md5 head = @s3.head(bucket, key) rescue nil key_md5 = head && head['etag'].gsub(/[^0-9a-fA-F]/, '') skip = (key_md5 == file_md5) end if skip logger.info("Skipping #{bucket}/#{key} (identical contents)") else logger.info("Downloading #{bucket}/#{key}") ::File.open(to_file_path, 'wb') do |f| @s3.get(bucket, key) { |chunk| f.write(chunk) } end downloaded += 1 end logger.info("Downloaded to \"#{to_file_path}\"") end end downloaded end # Uploads all files from the given directory (ignoring any empty # directories) to the given bucket. # # @param [String] bucket for upload # @param [String] from_dir_path source directory to upload # @option options [String] :subdirectory to start from or nil # @option options [TrueClass|FalseClass] :recursive true if recursive (default) # @option options [Array] :filters for returned paths or nil or empty # @option options [String] :visibility for uploaded files, defaults to 'public-read' # @return [Fixnum] count of downloaded files def upload_files(bucket, from_dir_path, options={}) Dir.chdir(from_dir_path) do logger.info("Working in #{Dir.pwd.inspect}") options = DEFAULT_OPTIONS.dup.merge(options) prefix = normalize_subdirectory_path(options[:subdirectory]) filters = normalize_filters(options) pattern = options[:recursive] ? '**/*' : '*' files = Dir.glob(pattern).select { |path| File.file?(path) } filter_files(files, filters) access = normalize_access(options) uploaded = 0 files.each do |path| key = "#{prefix}#{path}" file_md5 = Digest::MD5.hexdigest(File.read(path)) File.open(path, 'rb') do |f| head = @s3.head(bucket, key) rescue nil key_md5 = head && head['etag'].gsub(/[^0-9a-fA-F]/, '') if file_md5 == key_md5 logger.info("Skipping #{bucket}/#{key} (identical contents)") else logger.info("Uploading to #{bucket}/#{key}") @s3.put(bucket, key, f, 'x-amz-acl' => access) uploaded += 1 end end end uploaded end end # Deletes all files from the given bucket. # # @param [String] bucket for delete # @option options [String] :subdirectory to start from or nil # @option options [TrueClass|FalseClass] :recursive true if recursive (default) # @option options [Regexp] :filter for files to delete or nil # @return [Fixnum] count of deleted files def delete_files(bucket, options={}) options = DEFAULT_OPTIONS.dup.merge(options) prefix = normalize_subdirectory_path(options[:subdirectory]) files = list_files(bucket, options) if files.empty? logger.info("No files found in \"#{bucket}/#{prefix}\"") else logger.info("Deleting #{files.count} files...") files.each do |path| @s3.delete(bucket, "#{prefix}#{path}") logger.info("Deleted \"#{bucket}/#{prefix}#{path}\"") end end return files.size end protected # Normalizes a relative file path for use with S3. # # @param [String] subdirectory def normalize_file_path(path) # remove leading and trailing slashes and change any multiple slashes to single. return (path || '').gsub("\\", '/').gsub(/^\/+/, '').gsub(/\/+$/, '').gsub(/\/+/, '/') end # Normalizes subdirectory path for use with S3. # # @param [String] path # @return [String] normalized path def normalize_subdirectory_path(path) path = normalize_file_path(path) path += '/' unless path.empty? return path end # Normalizes storage filters from options. # # @option options [Array] :filters for returned paths or nil or empty def normalize_filters(options) initial_filters = Array(options[:filters]) normalized_filters = nil # support trivial filters as simple string array for direct lookup of # one or more S3 object (since listing entire buckets can be slow). # recursion always requires a listing so that cannot be trivial. if !options[:recursive] && initial_filters.size == 1 # filter is trivial unless it contains wildcards. more than one # non-wildcard filenames delimited by semicolon can be trivial. filter = initial_filters.first if filter.kind_of?(String) && filter == filter.gsub('*', '').gsub('?', '') normalized_filters = filter.split(';').uniq end end unless normalized_filters normalized_filters = [] normalized_filters << NO_SLASHES_REGEXP unless options[:recursive] initial_filters.each do |filter| if filter.kind_of?(String) # split on semicolon (;) and OR the result into one regular expression. # example: "*.tar;*.tgz;*.zip" -> /^.*\.tar|.*\.tgz|.*\.zip$/ # # convert wildcard-style filter string (e.g. '*.txt') to Regexp. escaped = Regexp.escape(filter).gsub("\\*", '.*').gsub("\\?", '.').gsub(';', '|') regexp = Regexp.compile("^#{escaped}$") filter = regexp end normalized_filters << filter unless normalized_filters.index(filter) end end return normalized_filters end # Normalizes access from options (for uploading files). # # Note: access strings are AWS S3-style but can easily be mapped to any # bucket storage implementation which supports ACLs. # # @option options [String] :access requested ACL or nil for public-read # @return @return [String] normalized access def normalize_access(options) access = options[:access].to_s.empty? ? nil : options[:access] return access || 'public-read' end # Filters the given list of file paths using the given filters, if any. # # @param [Array] files to filter # @param [Array] filters for matching or empty # @return [Array] filtered files def filter_files(files, filters) return files if filters.empty? # select each path only if it matches all filters. return files.select { |path| filters.all? { |filter| filter.match(path) } } end end # Interface end # Buckets end # RightDevelop