require 'net/https'
require 'cgi'
require 'erb'
require 'yaml'
require File.join(File.dirname(__FILE__), 's3_acl')
require File.join(File.dirname(__FILE__), 's33r_exception')
module S33r
include Net
# The client performs operations over the network,
# using the core to build request headers and content;
# only client-specific headers are managed here: other headers
# can be handled by the core.
#--
# TODO: use customisable thread pool for requests.
# TODO: timeout on requests.
#--
class Client
include S33r
# S3 keys.
attr_accessor :aws_access_key, :aws_secret_access_key
# Size of data chunk to be sent per request when putting data.
attr_accessor :chunk_size
# Headers which should be sent with every request by default (unless overridden).
attr_accessor :client_headers
# Whether client should use SSL.
attr_accessor :use_ssl
# Whether client dumps headers from requests.
attr_accessor :dump_requests
# Default log bucket location.
attr_accessor :log_bucket
# Configure either an SSL-enabled or plain HTTP client.
# (If using SSL, no verification of server certificate is performed.)
#
# +options+: hash of optional client config.:
# * :use_ssl => false: only use plain HTTP for connections
# * :dump_requests => true: dump each request's initial line and headers to STDOUT
def initialize(aws_access_key, aws_secret_access_key, options={})
@use_ssl = true
@use_ssl = false if (false == options[:use_ssl])
@dump_requests = (true == options[:dump_requests])
# set default chunk size for streaming request body
@chunk_size = DEFAULT_CHUNK_SIZE
@log_bucket = options[:log_bucket]
# Amazon S3 developer keys
@aws_access_key = aws_access_key
@aws_secret_access_key = aws_secret_access_key
# headers sent with every request made by this client
@client_headers = {}
end
# Get an HTTP client instance.
#
# NB this has been moved here so that client instances are
# only instantiated when needed (so Client can be used
# as an empty shell when list_buckets is called).
def get_client
if @use_ssl
client = HTTP.new(HOST, PORT)
# turn off SSL certificate verification
client.verify_mode = OpenSSL::SSL::VERIFY_NONE
client.use_ssl = true
else
client = HTTP.new(HOST, NON_SSL_PORT)
client.use_ssl = false
end
client
end
# Initialise client from YAML configuration file
# (see load_config method for details of acceptable format).
def Client.init(config_file)
aws_access_key, aws_secret_access_key, options = load_config(config_file)
Client.new(aws_access_key, aws_secret_access_key, options)
end
# Load YAML config. file for a client. The config. file looks like this:
#
# :include: test/files/namedbucket_config.yml
#
# Note that the loader also runs the config. file through ERB, so you can
# add dynamic blocks of ERB (Ruby) code into your files.
#
# The +options+ section contains settings specific to Client and NamedClient instances; +custom+
# contains extra settings specific to your application.
# +options+ and +custom+ sections can be omitted, but settings for AWS keys are required.
#
# Returns an array [aws_access_key, aws_secret_access_key, options], where +options+
# is a hash.
def Client.load_config(config_file)
config = YAML::load(ERB.new(IO.read(config_file)).result)
aws_access_key = config['aws_access_key']
aws_secret_access_key = config['aws_secret_access_key']
options = {}
options = S33r.keys_to_symbols(config['options']) if config['options']
[aws_access_key, aws_secret_access_key, options]
end
# Send a request over the wire.
#
# This method streams +data+ if it responds to the +stat+ method
# (as files do).
#
# Returns a Net::HTTPResponse instance.
def do_request(method, path, data=nil, headers={})
req = get_requester(method, path)
req.chunk_size = @chunk_size
# Add the S3 headers which are always required.
headers = add_default_headers(headers)
# Add any client-specific default headers.
headers = add_client_headers(headers)
# Generate the S3 authorization header.
headers['Authorization'] = generate_auth_header_value(method, path, headers,
@aws_access_key, @aws_secret_access_key)
# Insert the headers into the request object.
headers.each do |key, value|
req[key] = value
end
# Add data to the request as a stream.
if req.request_body_permitted?
# For streaming files; NB Content-Length will be set by Net::HTTP
# for character-based data: this section of code is only used
# when reading directly from a file.
if data.respond_to?(:stat)
req.body_stream = data
req['Content-Length'] = data.stat.size.to_s
data = nil
end
else
data = nil
end
if @dump_requests
puts req.to_s
end
# Run the request.
client = get_client
client.start do
response = client.request(req, data)
# Check the response to see whether S3 is down.
response.check_s3_availability
response
end
end
# Return an instance of an appropriate request class.
def get_requester(method, path)
raise S33rException::UnsupportedHTTPMethod, "The #{method} HTTP method is not supported" if !(METHOD_VERBS.include?(method))
eval("HTTP::" + method[0,1].upcase + method[1..-1].downcase + ".new('#{path}')")
end
# List all buckets.
#
# Returns an Array of NamedBucket instances.
def list_buckets
bucket_list_xml = do_get('/').body
doc = XML.get_xml_doc(S33r.remove_namespace(bucket_list_xml))
named_buckets = []
doc.find("//Bucket").to_a.each do |node|
bucket_name = node.xget('Name')
named_buckets << NamedBucket.new(@aws_access_key, @aws_secret_access_key,
{:default_bucket => bucket_name, :dump_request => self.dump_requests})
end
named_buckets
end
# List just bucket names.
def list
list_buckets.map {|bucket| bucket.name}
end
# List entries in a bucket.
#
# +query_params+: hash of options on the bucket listing request, passed as querystring parameters to S3
# (see http://docs.amazonwebservices.com/AmazonS3/2006-03-01/).
# * :prefix => 'some_string': restrict results to keys beginning with 'some_string'
# * :marker => 'some_string': restict results to keys occurring lexicographically after 'some_string'
# * :max_keys => 1000: return at most this number of keys (maximum possible value is 1000)
# * :delimiter => 'some_string': keys containing the same string between prefix and the delimiter
# are rolled up into a CommonPrefixes element inside the response
#
# NB if you pass a :marker, this takes up one of your :max_keys; so if you are fetching page
# two from a bucket, and you want 10 items, you need to set :max_keys to 11.
#
# To page through a bucket 10 keys at a time, you can do:
#
# resp, listing = list_bucket('mybucket', :max_keys => 10)
# resp, listing = list_bucket('mybucket', :max_keys => 11, :marker => listing.last_key)
# resp, listing = list_bucket('mybucket', :max_keys => 11, :marker => listing.last_key)
# etc.
#
# Note in the example code, +listing+ is a BucketListing instance; call its contents method
# to get a hash of the keys in the bucket, along with associated objects.
#
# Returns [raw_response, BucketListing instance].
def list_bucket(bucket_name, query_params={})
if query_params[:max_keys]
max_keys = query_params[:max_keys].to_i
raise S33rException::BucketListingMaxKeysError, "max_keys option to list bucket cannot be > #{BUCKET_LIST_MAX_MAX_KEYS}" \
if max_keys > BUCKET_LIST_MAX_MAX_KEYS
# convert max_keys parameter to :max-keys parameter
query_params['max-keys'] = query_params.delete(:max_keys)
end
resp = do_get("/#{bucket_name}" + generate_querystring(query_params))
[resp, BucketListing.new(resp.body)]
end
# Create a bucket.
#
# Returns true if response returned a 200 code; false otherwise.
def create_bucket(bucket_name, headers={})
resp = do_put("/#{bucket_name}", nil, headers)
resp.ok?
end
# Delete a bucket.
#
# +options+ hash can contain the following:
# * :force => true: delete all keys within the bucket then delete the bucket itself
#-- TODO: maybe delete keys matching a partial path
#-- TODO: if multiple pages of keys in buckets, need to get them by page.
def delete_bucket(bucket_name, headers={}, options={})
if true == options[:force]
_, bucket_listing = list_bucket(bucket_name)
bucket_listing.contents.each_value do |obj|
delete_resource(bucket_name, obj.key)
end
end
do_delete("/#{bucket_name}", headers)
end
# Check whether a bucket exists or not.
#
# Returns true if bucket exists.
def bucket_exists?(bucket_name)
resource_exists?(bucket_name)
end
# Fetch a resource.
def get_resource(bucket_name, resource_key, headers={})
do_get("/#{bucket_name}/#{resource_key}", headers)
end
# Check whether a bucket contains a key.
#
# Returns true if resource_key exists inside bucket_name exists.
def resource_exists?(bucket_name, resource_key=nil)
path = "/#{bucket_name}"
path += "/#{resource_key}" unless resource_key.nil?
do_head(path).ok?
end
# Fetch an object.
#
# TODO: return S3Object
def get_object(bucket_name, resource_key, headers)
response = get_resource(bucket_name, resource_key, headers)
end
# Fetch the ACL document for a resource.
#
# Returns nil if there is a problem with the resource
# (e.g. it doesn't exist).
def get_acl(bucket_name, resource_key='')
path = s3_acl_path(bucket_name, resource_key)
response = do_get(path)
if response.ok?
S3ACL::ACLDoc.from_xml(response.body)
else
raise S33rException::MissingResource, "Tried to get an ACL from a non-existent resource [#{path}]"
end
end
# Put the ACL document for a resource.
#
# +acl_doc+ is an S33r::S3ACL::ACLDoc instance.
#
# Returns true if response had a 200 code, false otherwise.
# If you get a 400 Bad Request back, it means a CanonicalUser
# could not be identified from the email address.
def set_acl(acl_doc, bucket_name, resource_key='')
path = s3_acl_path(bucket_name, resource_key)
response = do_put(path, acl_doc.to_xml)
response.ok?
end
# Set up logging for a bucket and resource key.
#
# +logging_resource+ = a LoggingResource instance.
# +bucket_name+ = a bucket to log.
# +resource_key+ = a resource to log (if empty, logging
# gets added to the bucket).
def set_logging(logging_resource, bucket_name, resource_key='')
path = s3_logging_path(bucket_name, resource_key)
response = do_put(path, logging_resource.to_xml)
end
# Make a resource public (i.e. grant READ permissions
# to the AllUsers group type). NB separate method is used
# on buckets, to make all of their content public too.
#
# Returns nil if resource does not exist.
def make_public(bucket_name, resource_key='')
acl = get_acl(bucket_name, resource_key)
if !acl.nil? and acl.add_public_read_grants
set_acl(acl, bucket_name, resource_key)
end
end
# TODO
def make_private
end
# Make a bucket capable of being a target for access logging.
#
# Returns true if the bucket is now a possible log target;
# false otherwise.
#
#-- TODO: tests
def enable_log_target(bucket_name)
acl = get_acl(bucket_name)
if acl.add_log_target_grants
set_acl(acl, bucket_name)
end
acl.log_targetable?
end
# Disable permissions for access logging into a bucket.
#
# Returns true if the bucket is no longer log targetable;
# false if it remains a log target.
#
#-- TODO: tests
def disable_log_target(bucket_name)
acl = get_acl(bucket_name)
acl.remove_log_target
set_acl(acl, bucket_name)
!acl.log_targetable?
end
# Enable logging for a resource (bucket or key).
#
# +log_prefix+ is the prefix for the logs.
# +bucket_name+ is the bucket to log.
# +log_bucket+ is the bucket to put logs into.
#
# options:
# +:for_key => 'key'+ is the (optional) resource to log in the bucket
# (NB this is not currently supported by S3).
# +:log_prefix => 'prefix'+ is the (optional) log file prefix
# (defaults to bucket_name + '-')
#
def enable_logging(bucket_name, log_bucket=nil, options={})
log_bucket ||= @log_bucket
resource_key = options[:for_key]
resource_key ||= ''
log_prefix = options[:prefix]
log_prefix ||= bucket_name + '-'
log_bucket_acl = get_acl(log_bucket)
if !(log_bucket_acl.log_targetable?)
raise BucketNotLogTargetable, "The bucket #{log_bucket} cannot be specified as a log target"
end
logging_resource = LoggingResource.new(log_bucket, log_prefix)
set_logging(logging_resource, bucket_name, resource_key)
end
# TODO
def disable_logging
end
# TODO
def get_logging
end
# Put some generic resource onto S3.
def put_resource(bucket_name, resource_key, data, headers={})
do_put("/#{bucket_name}/" + "#{CGI::escape(resource_key)}", data, headers)
end
# Put a string onto S3.
def put_text(string, bucket_name, resource_key, headers={})
headers["Content-Type"] = "text/plain"
put_resource(bucket_name, resource_key, string, headers)
end
# Put a file onto S3.
#
# If +resource_key+ is nil, the filename is used as the key instead.
#
# +headers+ sets some headers with the request; useful if you have an odd file type
# not recognised by the mimetypes library, and want to explicitly set the Content-Type header.
#
# +options+ hash simplifies setting some headers with specific meaning to S3:
# * :render_as_attachment => true: set the Content-Disposition for this file to "attachment" and set
# the default filename for saving the file (when accessed by a web browser) to +filename+; this
# turns the file into a download when opened in a browser, rather than trying to render it inline.
#
# Note that this method uses a handle to the file, so it can be streamed in chunks to S3.
def put_file(filename, bucket_name, resource_key=nil, headers={}, options={})
# default to the file path as the resource key if none explicitly set
if resource_key.nil?
resource_key = filename
end
# set Content-Disposition header
if options[:render_as_attachment]
headers['Content-Disposition'] = "attachment; filename=#{File.basename(filename)}"
end
# content type is explicitly set in the headers, so apply to request
if headers[:content_type]
# use the first MIME type corresponding to this content type string
# (MIME::Types returns an array of possible MIME types)
mime_type = MIME::Types[headers[:content_type]][0]
else
mime_type = guess_mime_type(filename)
end
content_type = mime_type.simplified
headers['Content-Type'] = content_type
headers['Content-Transfer-Encoding'] = 'binary' if mime_type.binary?
# the data we want to put (handle to file, so we can stream from it)
File.open(filename) do |data|
# send the put request
put_resource(bucket_name, resource_key, data, headers)
end
end
# Delete a resource from S3.
#
# Note that S3 returns the same response code () regardless
# of whether the resource was successfully deleted, or didn't exist
# in the first place.
def delete_resource(bucket_name, resource_key, headers={})
do_delete("/#{bucket_name}/#{resource_key}", headers)
end
# Add any default headers which should be sent with every request from the client.
#
# +headers+ is a hash of headers already set up. Any headers passed in here
# override the defaults in +client_headers+.
#
# Returns +headers+ with the content of +client_headers+ merged in.
def add_client_headers(headers)
headers.merge!(client_headers) { |key, arg, default| arg }
end
def do_get(path='/', headers={})
do_request('GET', path, nil, headers)
end
def do_head(path='/', headers={})
do_request('HEAD', path, nil, headers)
end
def do_post(path='/', data=nil, headers={})
do_request('POST', path, data, headers)
end
def do_put(path='/', data=nil, headers={})
do_request('PUT', path, data, headers)
end
def do_delete(path, headers={})
do_request('DELETE', path, nil, headers)
end
end
end