require 'net/https' require 'cgi' require 'erb' require 'yaml' require File.join(File.dirname(__FILE__), 's3_acl') require File.join(File.dirname(__FILE__), 's33r_exception') module S33r include Net # The client performs operations over the network, # using the core to build request headers and content; # only client-specific headers are managed here: other headers # can be handled by the core. #-- # TODO: use customisable thread pool for requests. # TODO: timeout on requests. #-- class Client include S33r # S3 keys. attr_accessor :aws_access_key, :aws_secret_access_key # Size of data chunk to be sent per request when putting data. attr_accessor :chunk_size # Headers which should be sent with every request by default (unless overridden). attr_accessor :client_headers # Whether client should use SSL. attr_accessor :use_ssl # Whether client dumps headers from requests. attr_accessor :dump_requests # Default log bucket location. attr_accessor :log_bucket # Configure either an SSL-enabled or plain HTTP client. # (If using SSL, no verification of server certificate is performed.) # # +options+: hash of optional client config.: # * :use_ssl => false: only use plain HTTP for connections # * :dump_requests => true: dump each request's initial line and headers to STDOUT def initialize(aws_access_key, aws_secret_access_key, options={}) @use_ssl = true @use_ssl = false if (false == options[:use_ssl]) @dump_requests = (true == options[:dump_requests]) # set default chunk size for streaming request body @chunk_size = DEFAULT_CHUNK_SIZE @log_bucket = options[:log_bucket] # Amazon S3 developer keys @aws_access_key = aws_access_key @aws_secret_access_key = aws_secret_access_key # headers sent with every request made by this client @client_headers = {} end # Get an HTTP client instance. # # NB this has been moved here so that client instances are # only instantiated when needed (so Client can be used # as an empty shell when list_buckets is called). def get_client if @use_ssl client = HTTP.new(HOST, PORT) # turn off SSL certificate verification client.verify_mode = OpenSSL::SSL::VERIFY_NONE client.use_ssl = true else client = HTTP.new(HOST, NON_SSL_PORT) client.use_ssl = false end client end # Initialise client from YAML configuration file # (see load_config method for details of acceptable format). def Client.init(config_file) aws_access_key, aws_secret_access_key, options = load_config(config_file) Client.new(aws_access_key, aws_secret_access_key, options) end # Load YAML config. file for a client. The config. file looks like this: # # :include: test/files/namedbucket_config.yml # # Note that the loader also runs the config. file through ERB, so you can # add dynamic blocks of ERB (Ruby) code into your files. # # The +options+ section contains settings specific to Client and NamedClient instances; +custom+ # contains extra settings specific to your application. # +options+ and +custom+ sections can be omitted, but settings for AWS keys are required. # # Returns an array [aws_access_key, aws_secret_access_key, options], where +options+ # is a hash. def Client.load_config(config_file) config = YAML::load(ERB.new(IO.read(config_file)).result) aws_access_key = config['aws_access_key'] aws_secret_access_key = config['aws_secret_access_key'] options = {} options = S33r.keys_to_symbols(config['options']) if config['options'] [aws_access_key, aws_secret_access_key, options] end # Send a request over the wire. # # This method streams +data+ if it responds to the +stat+ method # (as files do). # # Returns a Net::HTTPResponse instance. def do_request(method, path, data=nil, headers={}) req = get_requester(method, path) req.chunk_size = @chunk_size # Add the S3 headers which are always required. headers = add_default_headers(headers) # Add any client-specific default headers. headers = add_client_headers(headers) # Generate the S3 authorization header. headers['Authorization'] = generate_auth_header_value(method, path, headers, @aws_access_key, @aws_secret_access_key) # Insert the headers into the request object. headers.each do |key, value| req[key] = value end # Add data to the request as a stream. if req.request_body_permitted? # For streaming files; NB Content-Length will be set by Net::HTTP # for character-based data: this section of code is only used # when reading directly from a file. if data.respond_to?(:stat) req.body_stream = data req['Content-Length'] = data.stat.size.to_s data = nil end else data = nil end if @dump_requests puts req.to_s end # Run the request. client = get_client client.start do response = client.request(req, data) # Check the response to see whether S3 is down. response.check_s3_availability response end end # Return an instance of an appropriate request class. def get_requester(method, path) raise S33rException::UnsupportedHTTPMethod, "The #{method} HTTP method is not supported" if !(METHOD_VERBS.include?(method)) eval("HTTP::" + method[0,1].upcase + method[1..-1].downcase + ".new('#{path}')") end # List all buckets. # # Returns an Array of NamedBucket instances. def list_buckets bucket_list_xml = do_get('/').body doc = XML.get_xml_doc(S33r.remove_namespace(bucket_list_xml)) named_buckets = [] doc.find("//Bucket").to_a.each do |node| bucket_name = node.xget('Name') named_buckets << NamedBucket.new(@aws_access_key, @aws_secret_access_key, {:default_bucket => bucket_name, :dump_request => self.dump_requests}) end named_buckets end # List just bucket names. def list list_buckets.map {|bucket| bucket.name} end # List entries in a bucket. # # +query_params+: hash of options on the bucket listing request, passed as querystring parameters to S3 # (see http://docs.amazonwebservices.com/AmazonS3/2006-03-01/). # * :prefix => 'some_string': restrict results to keys beginning with 'some_string' # * :marker => 'some_string': restict results to keys occurring lexicographically after 'some_string' # * :max_keys => 1000: return at most this number of keys (maximum possible value is 1000) # * :delimiter => 'some_string': keys containing the same string between prefix and the delimiter # are rolled up into a CommonPrefixes element inside the response # # NB if you pass a :marker, this takes up one of your :max_keys; so if you are fetching page # two from a bucket, and you want 10 items, you need to set :max_keys to 11. # # To page through a bucket 10 keys at a time, you can do: # # resp, listing = list_bucket('mybucket', :max_keys => 10) # resp, listing = list_bucket('mybucket', :max_keys => 11, :marker => listing.last_key) # resp, listing = list_bucket('mybucket', :max_keys => 11, :marker => listing.last_key) # etc. # # Note in the example code, +listing+ is a BucketListing instance; call its contents method # to get a hash of the keys in the bucket, along with associated objects. # # Returns [raw_response, BucketListing instance]. def list_bucket(bucket_name, query_params={}) if query_params[:max_keys] max_keys = query_params[:max_keys].to_i raise S33rException::BucketListingMaxKeysError, "max_keys option to list bucket cannot be > #{BUCKET_LIST_MAX_MAX_KEYS}" \ if max_keys > BUCKET_LIST_MAX_MAX_KEYS # convert max_keys parameter to :max-keys parameter query_params['max-keys'] = query_params.delete(:max_keys) end resp = do_get("/#{bucket_name}" + generate_querystring(query_params)) [resp, BucketListing.new(resp.body)] end # Create a bucket. # # Returns true if response returned a 200 code; false otherwise. def create_bucket(bucket_name, headers={}) resp = do_put("/#{bucket_name}", nil, headers) resp.ok? end # Delete a bucket. # # +options+ hash can contain the following: # * :force => true: delete all keys within the bucket then delete the bucket itself #-- TODO: maybe delete keys matching a partial path #-- TODO: if multiple pages of keys in buckets, need to get them by page. def delete_bucket(bucket_name, headers={}, options={}) if true == options[:force] _, bucket_listing = list_bucket(bucket_name) bucket_listing.contents.each_value do |obj| delete_resource(bucket_name, obj.key) end end do_delete("/#{bucket_name}", headers) end # Check whether a bucket exists or not. # # Returns true if bucket exists. def bucket_exists?(bucket_name) resource_exists?(bucket_name) end # Fetch a resource. def get_resource(bucket_name, resource_key, headers={}) do_get("/#{bucket_name}/#{resource_key}", headers) end # Check whether a bucket contains a key. # # Returns true if resource_key exists inside bucket_name exists. def resource_exists?(bucket_name, resource_key=nil) path = "/#{bucket_name}" path += "/#{resource_key}" unless resource_key.nil? do_head(path).ok? end # Fetch an object. # # TODO: return S3Object def get_object(bucket_name, resource_key, headers) response = get_resource(bucket_name, resource_key, headers) end # Fetch the ACL document for a resource. # # Returns nil if there is a problem with the resource # (e.g. it doesn't exist). def get_acl(bucket_name, resource_key='') path = s3_acl_path(bucket_name, resource_key) response = do_get(path) if response.ok? S3ACL::ACLDoc.from_xml(response.body) else raise S33rException::MissingResource, "Tried to get an ACL from a non-existent resource [#{path}]" end end # Put the ACL document for a resource. # # +acl_doc+ is an S33r::S3ACL::ACLDoc instance. # # Returns true if response had a 200 code, false otherwise. # If you get a 400 Bad Request back, it means a CanonicalUser # could not be identified from the email address. def set_acl(acl_doc, bucket_name, resource_key='') path = s3_acl_path(bucket_name, resource_key) response = do_put(path, acl_doc.to_xml) response.ok? end # Set up logging for a bucket and resource key. # # +logging_resource+ = a LoggingResource instance. # +bucket_name+ = a bucket to log. # +resource_key+ = a resource to log (if empty, logging # gets added to the bucket). def set_logging(logging_resource, bucket_name, resource_key='') path = s3_logging_path(bucket_name, resource_key) response = do_put(path, logging_resource.to_xml) end # Make a resource public (i.e. grant READ permissions # to the AllUsers group type). NB separate method is used # on buckets, to make all of their content public too. # # Returns nil if resource does not exist. def make_public(bucket_name, resource_key='') acl = get_acl(bucket_name, resource_key) if !acl.nil? and acl.add_public_read_grants set_acl(acl, bucket_name, resource_key) end end # TODO def make_private end # Make a bucket capable of being a target for access logging. # # Returns true if the bucket is now a possible log target; # false otherwise. # #-- TODO: tests def enable_log_target(bucket_name) acl = get_acl(bucket_name) if acl.add_log_target_grants set_acl(acl, bucket_name) end acl.log_targetable? end # Disable permissions for access logging into a bucket. # # Returns true if the bucket is no longer log targetable; # false if it remains a log target. # #-- TODO: tests def disable_log_target(bucket_name) acl = get_acl(bucket_name) acl.remove_log_target set_acl(acl, bucket_name) !acl.log_targetable? end # Enable logging for a resource (bucket or key). # # +log_prefix+ is the prefix for the logs. # +bucket_name+ is the bucket to log. # +log_bucket+ is the bucket to put logs into. # # options: # +:for_key => 'key'+ is the (optional) resource to log in the bucket # (NB this is not currently supported by S3). # +:log_prefix => 'prefix'+ is the (optional) log file prefix # (defaults to bucket_name + '-') # def enable_logging(bucket_name, log_bucket=nil, options={}) log_bucket ||= @log_bucket resource_key = options[:for_key] resource_key ||= '' log_prefix = options[:prefix] log_prefix ||= bucket_name + '-' log_bucket_acl = get_acl(log_bucket) if !(log_bucket_acl.log_targetable?) raise BucketNotLogTargetable, "The bucket #{log_bucket} cannot be specified as a log target" end logging_resource = LoggingResource.new(log_bucket, log_prefix) set_logging(logging_resource, bucket_name, resource_key) end # TODO def disable_logging end # TODO def get_logging end # Put some generic resource onto S3. def put_resource(bucket_name, resource_key, data, headers={}) do_put("/#{bucket_name}/" + "#{CGI::escape(resource_key)}", data, headers) end # Put a string onto S3. def put_text(string, bucket_name, resource_key, headers={}) headers["Content-Type"] = "text/plain" put_resource(bucket_name, resource_key, string, headers) end # Put a file onto S3. # # If +resource_key+ is nil, the filename is used as the key instead. # # +headers+ sets some headers with the request; useful if you have an odd file type # not recognised by the mimetypes library, and want to explicitly set the Content-Type header. # # +options+ hash simplifies setting some headers with specific meaning to S3: # * :render_as_attachment => true: set the Content-Disposition for this file to "attachment" and set # the default filename for saving the file (when accessed by a web browser) to +filename+; this # turns the file into a download when opened in a browser, rather than trying to render it inline. # # Note that this method uses a handle to the file, so it can be streamed in chunks to S3. def put_file(filename, bucket_name, resource_key=nil, headers={}, options={}) # default to the file path as the resource key if none explicitly set if resource_key.nil? resource_key = filename end # set Content-Disposition header if options[:render_as_attachment] headers['Content-Disposition'] = "attachment; filename=#{File.basename(filename)}" end # content type is explicitly set in the headers, so apply to request if headers[:content_type] # use the first MIME type corresponding to this content type string # (MIME::Types returns an array of possible MIME types) mime_type = MIME::Types[headers[:content_type]][0] else mime_type = guess_mime_type(filename) end content_type = mime_type.simplified headers['Content-Type'] = content_type headers['Content-Transfer-Encoding'] = 'binary' if mime_type.binary? # the data we want to put (handle to file, so we can stream from it) File.open(filename) do |data| # send the put request put_resource(bucket_name, resource_key, data, headers) end end # Delete a resource from S3. # # Note that S3 returns the same response code () regardless # of whether the resource was successfully deleted, or didn't exist # in the first place. def delete_resource(bucket_name, resource_key, headers={}) do_delete("/#{bucket_name}/#{resource_key}", headers) end # Add any default headers which should be sent with every request from the client. # # +headers+ is a hash of headers already set up. Any headers passed in here # override the defaults in +client_headers+. # # Returns +headers+ with the content of +client_headers+ merged in. def add_client_headers(headers) headers.merge!(client_headers) { |key, arg, default| arg } end def do_get(path='/', headers={}) do_request('GET', path, nil, headers) end def do_head(path='/', headers={}) do_request('HEAD', path, nil, headers) end def do_post(path='/', data=nil, headers={}) do_request('POST', path, data, headers) end def do_put(path='/', data=nil, headers={}) do_request('PUT', path, data, headers) end def do_delete(path, headers={}) do_request('DELETE', path, nil, headers) end end end