require 'imw/resource'
module IMW
module Tools
# Packages an Array of input files into a single output archive.
# When the archive is extracted, all the input files given will be
# in a single directory with a chosen name. The path to the output
# archive determines both the name of the archive and its type (tar,
# tar.bz2, zip, &c.).
#
# If any of the input files are themselves archives, they will first
# be extracted, with only their contents winding up in the final
# directory (the file hierarchy of the archive will be preserved).
# If any of the input files are compressed, they will first be
# uncompressed before being added to the directory.
#
# Both local and remote files can be archived. An exmaple:
#
# archiver = IMW::Transforms::Archiver.new 'my_archive', '/path/to/my/regular_file.tsv', '/path/to/an/archive.tar.bz2', '/path/to/my_compressed_file.gz', 'http://mywebsite.com/index.html'
# archiver.package! '/path/to/my_archive.zip'
#
# This will create a ZIP archive at
# /path/to/my_archive.zip. When the ZIP archive is
# extracted its contents will look like
#
# my_archive
# |-- regular_file.tsv
# |-- archive_file1
# |-- archive_dir
# | |-- archive_file2
# | `-- archive_file3
# |-- archive_file3
# |-- my_compressed_file
# `-- index.html
#
# Notice that
#
# - the name of the extracted directory is given by the first
# argument to the Archiver when it was instantiated.
#
# - all files wind up in the top-level of this extracted directory
# when possible (regular_file.tsv, index.html)
#
# - /path/to/archive.tar.bz2 was not directly included, but its
# contents (archive_file1,
# archive_dir/archive_file2,
# archive_dir/archive_file3) were included instead.
#
# - /path/to/my_compressed_file.gz was first uncompressed before
# being added to the archive.
#
# - the remote file http://mywebsite.com/index.html was
# downloaded and included
#
# This process can take a while when the constituent files are
# large because there is quite a lot of preparation done to the
# files to make this nice output structure in the final archive.
# Further calls to package! on the same instance of
# Archiver will skip the preparation step (the intermediate
# results of which are sitting in IMW's temporary directory) and
# directly create the package, saving time when attempting to
# create multiple package formats from the same input data.
class Archiver
attr_accessor :name, :local_inputs, :remote_inputs
def initialize name, raw_inputs
@name = name
self.inputs = raw_inputs
end
# Set the inputs for this archiver.
#
# @param [String, IMW::Resource] new_inputs the inputs to archive, local or remote
def inputs= new_inputs
@local_inputs, @remote_inputs = [], []
new_inputs.each do |obj|
input = obj.is_a?(IMW::Resource) ? obj : IMW.open(obj) # take either paths/URIs or IMW::Resource objects
if input.is_local?
@local_inputs << (input.directory? ? input.resources : input) # recurse through directories
else
@remote_inputs << input
end
end
@local_inputs.flatten!
end
# Return a list of error messages for this archiver.
#
# @return [Array] the error messages
def errors
@errors ||= []
end
# Was this archiver successful (did it not have any errors)?
#
# @return [true, false]
def success?
errors.empty?
end
# A temporary directory to work in. Its contents will
# ultimately consist of a directory named for the package
# containing all the input files.
#
# @return [String]
def tmp_dir
@tmp_dir ||= File.join(IMW.path_to(:tmp_root, 'packager'), (Time.now.to_i.to_s + "-" + $$.to_s)) # guaranteed unique on a node
end
# A directory which will contain all the content being packaged,
# including the contents of any archives that were included in
# the list of files to process.
#
# @return [String]
def dir
@dir ||= File.join(tmp_dir, name.to_s)
end
# Remove the +tmp_dir+ entirely, getting rid of all temporary
# files.
def clean!
FileUtils.rm_rf(tmp_dir)
end
# Copy, decompress, or extract the input paths to the temporary
# directory, readying them for packaging.
def prepare!
FileUtils.mkdir_p dir unless File.exist?(dir)
local_inputs.each do |existing_file|
new_path = File.join(dir, existing_file.basename)
case
when existing_file.is_archive?
FileUtils.cd(dir) do
existing_file.extract
end
when existing_file.is_compressed?
existing_file.cp(new_path).decompress!
else
existing_file.cp(new_path)
end
end
remote_inputs.each do |remote_input|
remote_input.cp(File.join(dir, remote_input.effective_basename))
end
end
# Checks to see if all expected files exist in the temporary
# directory for this packager.
#
# @return [true, false]
def prepared?
local_inputs.each do |existing_file|
case
when existing_file.is_archive?
existing_file.contents.each do |archived_file_path|
return false unless File.exist?(File.join(dir, archived_file_path))
end
when existing_file.is_compressed?
return false unless File.exist?(File.join(dir, existing_file.decompressed_basename))
else
return false unless File.exist?(File.join(dir, existing_file.basename))
end
end
remote_inputs.each do |remote_input|
return false unless File.exist?(File.join(dir, remote_input.effective_basename))
end
true
end
# Package the contents of the temporary directory to an archive
# at +output+ but return exceptions instead of raising them.
#
# @param [String, IMW::Resource] output the path to the output package
# @param [Hash] options
# @return [RuntimeError, IMW::Resource] either the completed package or the error which was raised
def package output, options={}
begin
package! output, options={}
rescue => e
return e
end
end
# Package the contents of the temporary directory to an archive
# at +output+. The extension of +output+ determines the kind of
# archive.
#
# @param [String, IMW::Resource] output the path to the output package
# @param [Hash] options
# @return [IMW::Resource] the completed package
def package! output, options={}
prepare! unless prepared?
output = IMW.open(output)
FileUtils.mkdir_p(output.dirname) unless File.exist?(output.dirname)
output.rm! if output.exist?
FileUtils.cd(tmp_dir) { IMW.open(output.basename).create(*Dir["#{name}/**/*"]).mv(output.path) }
add_processing_error "Archiver: couldn't create archive #{output.path}" unless output.exists?
output
end
protected
def add_processing_error error # :nodoc:
IMW.logger.warn error
errors << error
end
end
end
end