Sha256: 125f92d2cda67908592e6092711b6a9361eec06a54425842ab35612f8aedbd28
Contents?: true
Size: 1.44 KB
Versions: 4
Compression:
Stored size: 1.44 KB
Contents
require 'aws-sdk-s3' require 'cgi' # Purpose search through a directory on S3 for a specified file pattern module S3Grep class Directory attr_reader :s3_url, :aws_s3_client def initialize(s3_url, aws_s3_client) @s3_url = s3_url @aws_s3_client = aws_s3_client end def self.glob(s3_url, aws_s3_client, regex, &block) new(s3_url, aws_s3_client).glob(regex, &block) end def glob(regex) each do |s3_file| next unless s3_file.match?(regex) yield s3_file end end def each uri = URI(s3_url) max_keys = 1_000 prefix = CGI.unescape(uri.path[1..-1] || '') resp = aws_s3_client.list_objects( { bucket: uri.host, prefix: prefix, max_keys: max_keys } ) resp.contents.each do |content| yield('s3://' + uri.host + '/' + escape_path(content.key)) end while resp.contents.size == max_keys marker = resp.contents.last.key resp = aws_s3_client.list_objects( { bucket: uri.host, prefix: prefix, max_keys: max_keys, marker: marker } ) resp.contents.each do |content| yield('s3://' + uri.host + '/' + escape_path(content.key)) end end end def escape_path(s3_path) s3_path.split('/').map { |part| CGI.escape(part) }.join('/') end end end
Version data entries
4 entries across 4 versions & 1 rubygems
Version | Path |
---|---|
s3grep-0.1.5 | lib/s3grep/directory.rb |
s3grep-0.1.4 | lib/s3grep/directory.rb |
s3grep-0.1.3 | lib/s3grep/directory.rb |
s3grep-0.1.2 | lib/s3grep/directory.rb |