Sha256: c9f843c63b0447340b33bd831331597ffb5580929752d3f2e71d555a884414fb

Contents?: true

Size: 1.55 KB

Versions: 4

Compression:

Stored size: 1.55 KB

Contents

require 'aws-sdk-s3'
require 'cgi'

# Purpose search through a directory on S3 for a specified file pattern
module S3Grep
  class Directory
    attr_reader :s3_url,
                :aws_s3_client

    def initialize(s3_url, aws_s3_client)
      @s3_url = s3_url
      @aws_s3_client = aws_s3_client
    end

    def self.glob(s3_url, aws_s3_client, regex, &block)
      new(s3_url, aws_s3_client).glob(regex, &block)
    end

    def glob(regex)
      each do |s3_file|
        next unless s3_file.match?(regex)

        yield s3_file
      end
    end

    def each
      each_content do |content|
        yield('s3://' + uri.host + '/' + escape_path(content.key))
      end
    end

    def each_content
      uri = URI(s3_url)

      max_keys = 1_000

      prefix = CGI.unescape(uri.path[1..-1] || '')

      resp = aws_s3_client.list_objects(
        {
          bucket: uri.host,
          prefix: prefix,
          max_keys: max_keys
        }
      )

      resp.contents.each do |content|
        yield(content)
      end

      while resp.contents.size == max_keys
        marker = resp.contents.last.key

        resp = aws_s3_client.list_objects(
          {
            bucket: uri.host,
            prefix: prefix,
            max_keys: max_keys,
            marker: marker
          }
        )

        resp.contents.each do |content|
          yield(content)
        end
      end
    end

    def escape_path(s3_path)
      s3_path.split('/').map { |part| CGI.escape(part) }.join('/')
    end

    def info
      ::S3Grep::DirectoryInfo.get(self)
    end
  end
end

Version data entries

4 entries across 4 versions & 1 rubygems

Version Path
s3grep-0.1.9 lib/s3grep/directory.rb
s3grep-0.1.8 lib/s3grep/directory.rb
s3grep-0.1.7 lib/s3grep/directory.rb
s3grep-0.1.6 lib/s3grep/directory.rb