require "hash_keyword_args"
require "pathname"
require "set"
require "tmpdir"
module Defog
class Proxy
attr_reader :proxy_root
attr_reader :persist
attr_reader :synchronize
attr_reader :max_cache_size
attr_reader :fog_wrapper # :nodoc:
# Opens a Fog
cloud storage connection to map to a corresponding proxy
# directory. Use via, e.g.,
#
# defog = Defog::Proxy.new(:provider => :AWS, :aws_access_key_id => access_key, ...)
#
# The :provider
and its corresponding options must be
# specified as per Fog::Storage.new
. Currently, only
# :local
and :AWS
are supported. When using
# :AWS
, an additional option :bucket
must be
# specified; all files proxied by this instance must be in a single
# bucket. (It's OK to create multiple Defog::Proxy instances with
# the same access info but different buckets; they will internally
# share a single Fog::Storage isntance hence AWS connection.)
#
# To further restrict the remote files acted on by this proxy, you
# can specify
# defog = Defog::Proxy.new(:provider => ..., :prefix => "my-prefix-string/")
# and all keys that you pass to Defog will be prefixed with the given
# string before being passed along to Fog. (Notice that it's up to you to
# have a trailing "/" in the prefix if that's what you want.)
#
# By default, each proxy's cache root directory is placed in a
# reasonable safe place, under Rails.root/tmp
if Rails is
# defined otherwise under Dir.tmpdir
. (More details:
# within that directory, the root directory is disambiguated by
# #provider and #location, so that multiple Defog::Proxy instances can
# be created without collision.)
#
# The upshot is that if you have no special constraints you don't need
# to worry about it. But if you do care, you can specify the option:
# :proxy_root => "/root/for/this/proxy/files"
#
# You can specify that by default local proxy files will be persisted,
# by specifying
# :persist => true
# The persistence behavior can be overriden on a per-file basis when
# opening or closing a proxy (see Defog::Handle#open, Defog::File#close)
#
# You can enable cache management by specifying a max cache size in
# bytes, e.g.
# :max_cache_size => 3.gigabytes
# See the README for discussion. [Number#gigabytes is defined in
# Rails' ActiveSupport core extensions]
#
# Normally synchronization (i.e. upload) of changes to local proxy
# files happens synchronously on close; i.e. Defog::File#close waits
# until the upload completes. However, you can control synchronization
# by specifying
# :synchronize => :async # Synchronize in a separate thread, don't wait
# :synchronize => false # Don't synchronize at all. Defeats the purpose of Defog
# :synchronize => true # This is the default behavior
# The synchronization behavior can be overridden on a per-file basis
# when opening or closing a proxy (see Defog::Handle#open,
# Defog::File#close). Note that this applies only to upload of changes to
# proxy files that are opened as writeable; the download of data to
# readable proxy files always happens synchronously.
#
# If you specify
# :logger => an-instance-of-Logger
# (or provide a logger via #logger=), Defog will log downloads and
# upload using Logger#info.
def initialize(opts={})
opts = opts.keyword_args(:provider => :required,
:proxy_root => :optional,
:persist => :optional,
:synchronize => {:valid => [:async, true, false], :default => true},
:max_cache_size => :optional,
:OTHERS => :optional)
@proxy_root = Pathname.new(opts.delete(:proxy_root)) if opts.proxy_root
@persist = opts.delete(:persist)
@synchronize = opts.delete(:synchronize)
@max_cache_size = opts.delete(:max_cache_size)
@reserved_proxy_paths = Set.new
@fog_wrapper = FogWrapper.connect(opts)
@proxy_root ||= case
when defined?(Rails) then Rails.root + "tmp"
else Pathname.new(Dir.tmpdir)
end + "defog" + "#{provider}-#{location}"
end
def to_s
"<#{self.class} provider=#{provider} location=#{location}>"
end
# Returns the provider for this proxy. I.e., :local
or
# :AWS
def provider
@fog_wrapper.provider
end
# Returns a 'location' handle to use in the default proxy root path,
# to disambiguate it from other proxies with the same provider. For
# :AWS it's the bucket name, for :Local it's derived from the local
# root path.
def location
@fog_wrapper.location
end
# Returns the underlying Fog::Storage object for the cloud connection
def fog_connection
@fog_wrapper.fog_connection
end
# Returns the Fog directory object for the root of the cloud files
def fog_directory
@fog_wrapper.fog_directory
end
# Returns the prefix that was passed
def prefix
@fog_wrapper.prefix
end
def logger
@fog_wrapper.logger
end
def logger=(log)
@fog_wrapper.logger= log
end
# Proxy a remote cloud file. Returns or yields a Defog::Handle object that
# represents the file.
#
# If a mode
is given, opens a proxy file via
# Defog::Handle#open (passing it the mode and other options and
# optional block), returning or yielding instead the Defog::File object.
#
#
# Thus
# proxy.file("key", mode, options, &block)
# is shorthand for
# proxy.file("key").open(mode, options, &block)
#
def file(key, mode=nil, opts={}, &block)
handle = Handle.new(self, key)
case
when mode then handle.open(mode, opts, &block) if mode
when block then block.call(handle)
else handle
end
end
# Iterate through the cloud storage, yielding a Defog::Handle for each
# remote file.
#
# If no block is given, an enumerator is returned.
def each(&block)
if block_given?
@fog_wrapper.each do |key|
yield file(key)
end
else
to_enum(:each)
end
end
###############################
# public-but-internal methods
#
def reserve_proxy_path(proxy_path) #:nodoc:
@reserved_proxy_paths << proxy_path
end
def release_proxy_path(proxy_path) #:nodoc:
@reserved_proxy_paths.delete proxy_path
end
def manage_cache(want_size, proxy_path) #:nodoc:
return if max_cache_size.nil?
return if want_size.nil?
return if want_size <= 0
# find available space (not counting current proxy)
available = max_cache_size
proxy_root.find { |path|
available -= begin
path.size
rescue Errno::ENOENT
# some other process has snuck in and deleted the
# file since the path.file? check. has happened...
0
end if path.file? and path != proxy_path
}
return if available >= want_size
space_needed = want_size - available
# find all paths in the cache that aren't currently open (not
# counting current proxy)
candidates = []
proxy_root.find { |path| candidates << path if path.file? and not @reserved_proxy_paths.include?(path) and path != proxy_path}
# take candidates in LRU order until that would be enough space
would_free = 0
candidates = Set.new(candidates.sort_by(&:atime).take_while{|path| (would_free < space_needed).tap{|condition| would_free += path.size}})
# still not enough...?
raise Error::CacheFull, "No room in cache for #{proxy_path.relative_path_from(proxy_root)}: size=#{want_size} available=#{available} can_free=#{would_free} (max_cache_size=#{max_cache_size})" if would_free < space_needed
# LRU order may have taken more than needed, if last file was a big
# chunk. So take another pass, eliminating files that aren't needed.
# Do this in reverse size order, since we want to keep big files in
# the cache if possible since they're most expensive to replace.
candidates.sort_by(&:size).reverse.each do |path|
if (would_free - path.size) > space_needed
candidates.delete path
would_free -= path.size
end
end
# free the remaining candidates
candidates.each do |candidate|
begin
candidate.unlink
rescue Errno::ENOENT
# some other process has deleted the file while we were looking at it.
# nothing to do.
end
end
end
end
end