lib/s33r/client.rb in s33r-0.3.1 vs lib/s33r/client.rb in s33r-0.4

- old
+ new

@@ -1,7 +1,11 @@ require 'net/https' require 'cgi' +require 'erb' +require 'yaml' +require File.join(File.dirname(__FILE__), 's3_acl') +require File.join(File.dirname(__FILE__), 's33r_exception') module S33r include Net # The client performs operations over the network, @@ -13,109 +17,133 @@ # TODO: timeout on requests. #-- class Client include S33r + # S3 keys. attr_accessor :aws_access_key, :aws_secret_access_key # Size of data chunk to be sent per request when putting data. attr_accessor :chunk_size # Headers which should be sent with every request by default (unless overridden). attr_accessor :client_headers + + # Whether client should use SSL. + attr_accessor :use_ssl + + # Whether client dumps headers from requests. + attr_accessor :dump_requests + + # Default log bucket location. + attr_accessor :log_bucket # Configure either an SSL-enabled or plain HTTP client. # (If using SSL, no verification of server certificate is performed.) # # +options+: hash of optional client config.: # * <tt>:use_ssl => false</tt>: only use plain HTTP for connections # * <tt>:dump_requests => true</tt>: dump each request's initial line and headers to STDOUT def initialize(aws_access_key, aws_secret_access_key, options={}) - if false == options[:use_ssl] - @client = HTTP.new(HOST, NON_SSL_PORT) - @client.use_ssl = false - else - @client = HTTP.new(HOST, PORT) - # turn off SSL certificate verification - @client.verify_mode = OpenSSL::SSL::VERIFY_NONE - @client.use_ssl = true - end + @use_ssl = true + @use_ssl = false if (false == options[:use_ssl]) @dump_requests = (true == options[:dump_requests]) # set default chunk size for streaming request body @chunk_size = DEFAULT_CHUNK_SIZE + + @log_bucket = options[:log_bucket] # Amazon S3 developer keys @aws_access_key = aws_access_key @aws_secret_access_key = aws_secret_access_key # headers sent with every request made by this client @client_headers = {} end + # Get an HTTP client instance. + # + # NB this has been moved here so that client instances are + # only instantiated when needed (so Client can be used + # as an empty shell when list_buckets is called). + def get_client + if @use_ssl + client = HTTP.new(HOST, PORT) + # turn off SSL certificate verification + client.verify_mode = OpenSSL::SSL::VERIFY_NONE + client.use_ssl = true + else + client = HTTP.new(HOST, NON_SSL_PORT) + client.use_ssl = false + end + + client + end + # Initialise client from YAML configuration file # (see load_config method for details of acceptable format). def Client.init(config_file) - aws_access_key, aws_secret_access_key, options, _ = load_config(config_file) + aws_access_key, aws_secret_access_key, options = load_config(config_file) Client.new(aws_access_key, aws_secret_access_key, options) end # Load YAML config. file for a client. The config. file looks like this: # # :include: test/files/namedbucket_config.yml # + # Note that the loader also runs the config. file through ERB, so you can + # add dynamic blocks of ERB (Ruby) code into your files. + # # The +options+ section contains settings specific to Client and NamedClient instances; +custom+ # contains extra settings specific to your application. # +options+ and +custom+ sections can be omitted, but settings for AWS keys are required. # - # Returns an array <tt>[aws_access_key, aws_secret_access_key, options, custom]</tt>, where +options+ - # and +custom+ are hashes. + # Returns an array <tt>[aws_access_key, aws_secret_access_key, options]</tt>, where +options+ + # is a hash. def Client.load_config(config_file) - require 'yaml' - config = YAML::load_file(config_file) + config = YAML::load(ERB.new(IO.read(config_file)).result) aws_access_key = config['aws_access_key'] aws_secret_access_key = config['aws_secret_access_key'] options = {} options = S33r.keys_to_symbols(config['options']) if config['options'] - custom = {} - custom = S33r.keys_to_symbols(config['custom']) if config['custom'] - - [aws_access_key, aws_secret_access_key, options, custom] + [aws_access_key, aws_secret_access_key, options] end - # Wrapper round embedded client +use_ssl+ accessor. - def use_ssl? - @client.use_ssl - end - # Send a request over the wire. # # This method streams +data+ if it responds to the +stat+ method # (as files do). + # + # Returns a Net::HTTPResponse instance. def do_request(method, path, data=nil, headers={}) req = get_requester(method, path) req.chunk_size = @chunk_size - # add the S3 headers which are always required + # Add the S3 headers which are always required. headers = add_default_headers(headers) - # add any client-specific default headers + # Add any client-specific default headers. headers = add_client_headers(headers) + # Generate the S3 authorization header. headers['Authorization'] = generate_auth_header_value(method, path, headers, @aws_access_key, @aws_secret_access_key) + # Insert the headers into the request object. headers.each do |key, value| req[key] = value end + # Add data to the request as a stream. if req.request_body_permitted? - # for streaming files; NB Content-Length will be set by Net::HTTP - # for character-based body content + # For streaming files; NB Content-Length will be set by Net::HTTP + # for character-based data: this section of code is only used + # when reading directly from a file. if data.respond_to?(:stat) req.body_stream = data req['Content-Length'] = data.stat.size.to_s data = nil end @@ -125,13 +153,18 @@ if @dump_requests puts req.to_s end - @client.start do - response = @client.request(req, data) + # Run the request. + client = get_client + client.start do + response = client.request(req, data) + + # Check the response to see whether S3 is down. response.check_s3_availability + response end end # Return an instance of an appropriate request class. @@ -139,49 +172,85 @@ raise S33rException::UnsupportedHTTPMethod, "The #{method} HTTP method is not supported" if !(METHOD_VERBS.include?(method)) eval("HTTP::" + method[0,1].upcase + method[1..-1].downcase + ".new('#{path}')") end # List all buckets. + # + # Returns an Array of NamedBucket instances. def list_buckets - do_get('/') + bucket_list_xml = do_get('/').body + doc = XML.get_xml_doc(S33r.remove_namespace(bucket_list_xml)) + + named_buckets = [] + + doc.find("//Bucket").to_a.each do |node| + bucket_name = node.xget('Name') + named_buckets << NamedBucket.new(@aws_access_key, @aws_secret_access_key, + {:default_bucket => bucket_name, :dump_request => self.dump_requests}) + end + + named_buckets end + + # List just bucket names. + def list + list_buckets.map {|bucket| bucket.name} + end # List entries in a bucket. # # +query_params+: hash of options on the bucket listing request, passed as querystring parameters to S3 # (see http://docs.amazonwebservices.com/AmazonS3/2006-03-01/). # * <tt>:prefix => 'some_string'</tt>: restrict results to keys beginning with 'some_string' # * <tt>:marker => 'some_string'</tt>: restict results to keys occurring lexicographically after 'some_string' # * <tt>:max_keys => 1000</tt>: return at most this number of keys (maximum possible value is 1000) # * <tt>:delimiter => 'some_string'</tt>: keys containing the same string between prefix and the delimiter # are rolled up into a CommonPrefixes element inside the response + # + # NB if you pass a :marker, this takes up one of your :max_keys; so if you are fetching page + # two from a bucket, and you want 10 items, you need to set :max_keys to 11. + # + # To page through a bucket 10 keys at a time, you can do: + # + # resp, listing = list_bucket('mybucket', :max_keys => 10) + # resp, listing = list_bucket('mybucket', :max_keys => 11, :marker => listing.last_key) + # resp, listing = list_bucket('mybucket', :max_keys => 11, :marker => listing.last_key) + # etc. + # + # Note in the example code, +listing+ is a BucketListing instance; call its contents method + # to get a hash of the keys in the bucket, along with associated objects. + # + # Returns [raw_response, BucketListing instance]. def list_bucket(bucket_name, query_params={}) if query_params[:max_keys] max_keys = query_params[:max_keys].to_i raise S33rException::BucketListingMaxKeysError, "max_keys option to list bucket cannot be > #{BUCKET_LIST_MAX_MAX_KEYS}" \ if max_keys > BUCKET_LIST_MAX_MAX_KEYS - # take out the max_keys parameter and move it to max-keys + # convert max_keys parameter to :max-keys parameter query_params['max-keys'] = query_params.delete(:max_keys) end resp = do_get("/#{bucket_name}" + generate_querystring(query_params)) - bucket_listing = BucketListing.new(resp.body) - [resp, bucket_listing] + [resp, BucketListing.new(resp.body)] end # Create a bucket. + # + # Returns true if response returned a 200 code; false otherwise. def create_bucket(bucket_name, headers={}) - do_put("/#{bucket_name}", nil, headers) + resp = do_put("/#{bucket_name}", nil, headers) + resp.ok? end # Delete a bucket. # # +options+ hash can contain the following: # * <tt>:force => true</tt>: delete all keys within the bucket then delete the bucket itself #-- TODO: maybe delete keys matching a partial path + #-- TODO: if multiple pages of keys in buckets, need to get them by page. def delete_bucket(bucket_name, headers={}, options={}) if true == options[:force] _, bucket_listing = list_bucket(bucket_name) bucket_listing.contents.each_value do |obj| delete_resource(bucket_name, obj.key) @@ -189,29 +258,155 @@ end do_delete("/#{bucket_name}", headers) end + # Check whether a bucket exists or not. + # # Returns true if bucket exists. def bucket_exists?(bucket_name) - do_head("/#{bucket_name}").ok? + resource_exists?(bucket_name) end - # Fetch head info for a key in a bucket. - def head_resource(bucket_name, resource_key, headers={}) - do_head("/#{bucket_name}/#{resource_key}", headers) - end - # Fetch a resource. def get_resource(bucket_name, resource_key, headers={}) do_get("/#{bucket_name}/#{resource_key}", headers) end + # Check whether a bucket contains a key. + # + # Returns true if resource_key exists inside bucket_name exists. + def resource_exists?(bucket_name, resource_key=nil) + path = "/#{bucket_name}" + path += "/#{resource_key}" unless resource_key.nil? + do_head(path).ok? + end + + # Fetch an object. + # # TODO: return S3Object def get_object(bucket_name, resource_key, headers) response = get_resource(bucket_name, resource_key, headers) end + + # Fetch the ACL document for a resource. + # + # Returns nil if there is a problem with the resource + # (e.g. it doesn't exist). + def get_acl(bucket_name, resource_key='') + path = s3_acl_path(bucket_name, resource_key) + response = do_get(path) + if response.ok? + S3ACL::ACLDoc.from_xml(response.body) + else + raise S33rException::MissingResource, "Tried to get an ACL from a non-existent resource [#{path}]" + end + end + + # Put the ACL document for a resource. + # + # +acl_doc+ is an S33r::S3ACL::ACLDoc instance. + # + # Returns true if response had a 200 code, false otherwise. + # If you get a 400 Bad Request back, it means a CanonicalUser + # could not be identified from the email address. + def set_acl(acl_doc, bucket_name, resource_key='') + path = s3_acl_path(bucket_name, resource_key) + response = do_put(path, acl_doc.to_xml) + response.ok? + end + + # Set up logging for a bucket and resource key. + # + # +logging_resource+ = a LoggingResource instance. + # +bucket_name+ = a bucket to log. + # +resource_key+ = a resource to log (if empty, logging + # gets added to the bucket). + def set_logging(logging_resource, bucket_name, resource_key='') + path = s3_logging_path(bucket_name, resource_key) + response = do_put(path, logging_resource.to_xml) + end + + # Make a resource public (i.e. grant READ permissions + # to the AllUsers group type). NB separate method is used + # on buckets, to make all of their content public too. + # + # Returns nil if resource does not exist. + def make_public(bucket_name, resource_key='') + acl = get_acl(bucket_name, resource_key) + if !acl.nil? and acl.add_public_read_grants + set_acl(acl, bucket_name, resource_key) + end + end + + # TODO + def make_private + end + + # Make a bucket capable of being a target for access logging. + # + # Returns true if the bucket is now a possible log target; + # false otherwise. + # + #-- TODO: tests + def enable_log_target(bucket_name) + acl = get_acl(bucket_name) + if acl.add_log_target_grants + set_acl(acl, bucket_name) + end + acl.log_targetable? + end + + # Disable permissions for access logging into a bucket. + # + # Returns true if the bucket is no longer log targetable; + # false if it remains a log target. + # + #-- TODO: tests + def disable_log_target(bucket_name) + acl = get_acl(bucket_name) + acl.remove_log_target + set_acl(acl, bucket_name) + !acl.log_targetable? + end + + # Enable logging for a resource (bucket or key). + # + # +log_prefix+ is the prefix for the logs. + # +bucket_name+ is the bucket to log. + # +log_bucket+ is the bucket to put logs into. + # + # options: + # +:for_key => 'key'+ is the (optional) resource to log in the bucket + # (NB this is not currently supported by S3). + # +:log_prefix => 'prefix'+ is the (optional) log file prefix + # (defaults to bucket_name + '-') + # + def enable_logging(bucket_name, log_bucket=nil, options={}) + log_bucket ||= @log_bucket + + resource_key = options[:for_key] + resource_key ||= '' + + log_prefix = options[:prefix] + log_prefix ||= bucket_name + '-' + + log_bucket_acl = get_acl(log_bucket) + if !(log_bucket_acl.log_targetable?) + raise BucketNotLogTargetable, "The bucket #{log_bucket} cannot be specified as a log target" + end + logging_resource = LoggingResource.new(log_bucket, log_prefix) + set_logging(logging_resource, bucket_name, resource_key) + end + + # TODO + def disable_logging + end + + # TODO + def get_logging + end # Put some generic resource onto S3. def put_resource(bucket_name, resource_key, data, headers={}) do_put("/#{bucket_name}/" + "#{CGI::escape(resource_key)}", data, headers) end @@ -264,10 +459,14 @@ put_resource(bucket_name, resource_key, data, headers) end end # Delete a resource from S3. + # + # Note that S3 returns the same response code () regardless + # of whether the resource was successfully deleted, or didn't exist + # in the first place. def delete_resource(bucket_name, resource_key, headers={}) do_delete("/#{bucket_name}/#{resource_key}", headers) end # Add any default headers which should be sent with every request from the client. @@ -278,12 +477,11 @@ # Returns +headers+ with the content of +client_headers+ merged in. def add_client_headers(headers) headers.merge!(client_headers) { |key, arg, default| arg } end - protected def do_get(path='/', headers={}) - do_request('GET', path, headers) + do_request('GET', path, nil, headers) end def do_head(path='/', headers={}) do_request('HEAD', path, nil, headers) end \ No newline at end of file