require 'base64' require 'time' require 'net/http' require 'net/https' require 'openssl' require 'xml/libxml' # Module to handle S3 operations which don't require an internet connection, # i.e. data validation and request-building operations; # also holds all the constants relating to S3. # # Parts of this code are heavily based on Amazon's code. Here's their license: # # This software code is made available "AS IS" without warranties of any # kind. You may copy, display, modify and redistribute the software # code either by itself or as incorporated into your code; provided that # you do not remove any proprietary notices. Your use of this software # code is at your own risk and you waive any claim against Amazon # Digital Services, Inc. or its affiliates with respect to your use of # this software code. (c) 2006 Amazon Digital Services, Inc. or its # affiliates. module S33r HOST = 's3.amazonaws.com' PORT = 443 NON_SSL_PORT = 80 METADATA_PREFIX = 'x-amz-meta-' # Size of each chunk (in bytes) to be sent per request when putting files. DEFAULT_CHUNK_SIZE = 1048576 AWS_HEADER_PREFIX = 'x-amz-' AWS_AUTH_HEADER_VALUE = "AWS %s:%s" INTERESTING_HEADERS = ['content-md5', 'content-type', 'date'] # Headers which must be included with every request to S3. REQUIRED_HEADERS = ['Content-Type', 'Date'] CANNED_ACLS = ['private', 'public-read', 'public-read-write', 'authenticated-read'] # HTTP methods which S3 will respond to. METHOD_VERBS = ['GET', 'PUT', 'HEAD', 'POST', 'DELETE'] # Maximum number which can be passed in max-keys parameter when GETting bucket list. BUCKET_LIST_MAX_MAX_KEYS = 1000 # Default number of seconds an authenticated URL will last for (15 minutes). DEFAULT_EXPIRY_SECS = 60 * 15 # Build canonical string for signing; # modified (slightly) from the Amazon sample code. def generate_canonical_string(method, path, headers={}, expires=nil) interesting_headers = {} headers.each do |key, value| lk = key.downcase if (INTERESTING_HEADERS.include?(lk) or lk =~ /^#{AWS_HEADER_PREFIX}/o) interesting_headers[lk] = value end end # these fields get empty strings if they don't exist. interesting_headers['content-type'] ||= '' interesting_headers['content-md5'] ||= '' # if you're using expires for query string auth, then it trumps date if not expires.nil? interesting_headers['date'] = expires end buf = "#{method}\n" interesting_headers.sort { |a, b| a[0] <=> b[0] }.each do |key, value| if key =~ /^#{AWS_HEADER_PREFIX}/o buf << "#{key}:#{value}\n" else buf << "#{value}\n" end end # ignore everything after the question mark... buf << path.gsub(/\?.*$/, '') # ...unless there is an acl or torrent parameter if path =~ /[&?]acl($|&|=)/ buf << '?acl' elsif path =~ /[&?]torrent($|&|=)/ buf << '?torrent' end return buf end # Get the value for the AWS authentication header. def generate_auth_header_value(method, path, headers, aws_access_key, aws_secret_access_key) raise S33rException::MethodNotAvailable, "Method %s not available" % method if !METHOD_VERBS.include?(method) # check the headers needed for authentication have been set missing_headers = REQUIRED_HEADERS - headers.keys if !(missing_headers.empty?) raise S33rException::MissingRequiredHeaders, "Headers required for AWS auth value are missing: " + missing_headers.join(', ') end # get the AWS header canonical_string = generate_canonical_string(method, path, headers) signature = generate_signature(aws_secret_access_key, canonical_string) AWS_AUTH_HEADER_VALUE % [aws_access_key, signature] end # Encode the given string with the aws_secret_access_key, by taking the # hmac sha1 sum, and then base64 encoding it. def generate_signature(aws_secret_access_key, str) digest = OpenSSL::HMAC::digest(OpenSSL::Digest::Digest.new("SHA1"), aws_secret_access_key, str) Base64.encode64(digest).strip end # Build the headers required with every S3 request (Date and Content-Type); # options hash can contain extra header settings, as follows: # :date and :content_type are required headers, and set to defaults if not supplied def add_default_headers(headers, options={}) # set the default headers required by AWS missing_headers = REQUIRED_HEADERS - headers.keys if missing_headers.include?('Content-Type') headers['Content-Type'] = options[:content_type] || '' end if missing_headers.include?('Date') date = options[:date] || Time.now headers['Date'] = date.httpdate end headers end # Add metadata headers, correctly prefixing them first. # Returns headers with the metadata headers appended. def metadata_headers(headers, metadata={}) unless metadata.empty? metadata.each { |key, value| headers[METADATA_PREFIX + key] = value } end headers end # Add a canned ACL setter header. def canned_acl_header(canned_acl, headers={}) unless canned_acl.nil? unless CANNED_ACLS.include?(canned_acl) raise S33rException::UnsupportedCannedACL, "The canned ACL #{canned_acl} is not supported" end headers[AWS_HEADER_PREFIX + 'acl'] = canned_acl end headers end # Guess a file's mime type. # If the mime_type for a file cannot be guessed, "text/plain" is used. def guess_mime_type(file_name) mime_type = MIME::Types.type_for(file_name)[0] mime_type ||= MIME::Types['text/plain'][0] mime_type end # Ensure that a bucket_name is well-formed (no leading or trailing slash). def bucket_name_valid?(bucket_name) if ('/' == bucket_name[0,1] || '/' == bucket_name[-1,1]) raise S33rException::MalformedBucketName, "Bucket name cannot have a leading or trailing slash" end end # Convert a hash of name/value pairs to querystring variables. # Name for a variable can be a string or symbol. def generate_querystring(pairs={}) str = '' if pairs.size > 0 str += "?" + pairs.map { |key, value| "#{key}=#{CGI::escape(value.to_s)}" }.join('&') end str end # Build URLs from fragments. # Does similar job to File.join but puts forward slash between arguments # (only if it's not already there). def url_join(*args) url_start = '' url_end = args.join('/') # string index where the scheme of the URL (xxxx://) ends scheme_ends_at = (url_end =~ /:\/\//) unless scheme_ends_at.nil? scheme_ends_at = scheme_ends_at + 1 url_start = url_end[0..scheme_ends_at] url_end = url_end[(scheme_ends_at + 1)..-1] end # replace any multiple forward slashes (except those in the scheme) url_end = url_end.gsub(/\/{2,}/, '/') url_start + url_end end # The public URL for this key (which only works if public-read ACL is set). def s3_public_url(bucket_name, resource_key) "http://" + HOST + '/' + bucket_name + '/' + resource_key end # Generate a get-able URL for an S3 resource key which passes authentication in querystring. # int expires: when the URL expires (seconds since the epoch) def s3_authenticated_url(aws_access_key, aws_secret_access_key, bucket_name, resource_key, expires) path = '/' + bucket_name + '/' + resource_key canonical_string = generate_canonical_string('GET', path, {}, expires) signature = generate_signature(aws_secret_access_key, canonical_string) querystring = generate_querystring({ 'Signature' => signature, 'Expires' => expires, 'AWSAccessKeyId' => aws_access_key }) return s3_public_url(bucket_name, resource_key) + querystring end # Turn keys in a hash hsh into symbols. # Returns a hash with 'symbolised' keys. def S33r.keys_to_symbols(hsh) symbolised = hsh.inject({}) do |symbolised, key_value| symbolised.merge({key_value[0].to_sym => key_value[1]}) end symbolised end end