lib/cloud_crowd/action.rb in documentcloud-cloud-crowd-0.2.0 vs lib/cloud_crowd/action.rb in documentcloud-cloud-crowd-0.2.1
- old
+ new
@@ -10,10 +10,13 @@
# to be mapped into WorkUnits and processed in parallel. In the +merge+ step,
# +input+ will be an array of all the resulting outputs from calling process.
#
# All actions have use of an individual +work_directory+, for scratch files,
# and spend their duration inside of it, so relative paths work well.
+ #
+ # Note that Actions inherit a backticks (`) method that raises an Exception
+ # if the external command fails.
class Action
FILE_URL = /\Afile:\/\//
attr_reader :input, :input_path, :file_name, :options, :work_directory
@@ -31,11 +34,11 @@
status == MERGING ? parse_input : download_input
end
# Each Action subclass must implement a +process+ method, overriding this.
def process
- raise NotImplementedError.new("CloudCrowd::Actions must override 'process' with their own processing code.")
+ raise NotImplementedError, "CloudCrowd::Actions must override 'process' with their own processing code."
end
# Download a file to the specified path.
def download(url, path)
`curl -s "#{url}" > "#{path}"`
@@ -64,22 +67,31 @@
# to the root directory (where workers run by default).
def cleanup_work_directory
FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
end
+ # Actions have a backticks command that raises a CommandFailed exception
+ # on failure, so that processing doesn't just blithely continue.
+ def `(command)
+ result = super(command)
+ exit_code = $?.to_i
+ raise Error::CommandFailed.new(result, exit_code) unless exit_code == 0
+ result
+ end
+
private
# Convert an unsafe URL into a filesystem-friendly filename.
def safe_filename(url)
ext = File.extname(url)
name = URI.unescape(File.basename(url)).gsub(/[^a-zA-Z0-9_\-.]/, '-').gsub(/-+/, '-')
File.basename(name, ext).gsub('.', '-') + ext
end
# The directory prefix to use for both local and S3 storage.
- # [action_name]/job_[job_id]/unit_[work_unit_it]
+ # [action]/job_[job_id]/unit_[work_unit_it]
def storage_prefix
path_parts = []
path_parts << Inflector.underscore(self.class)
path_parts << "job_#{@job_id}"
path_parts << "unit_#{@work_unit_id}" if @work_unit_id
@@ -91,12 +103,12 @@
@input = JSON.parse(@input)
end
# If the input is a URL, download the file before beginning processing.
def download_input
+ input_is_url = !!URI.parse(@input) rescue false
+ return unless input_is_url
Dir.chdir(@work_directory) do
- input_is_url = !!URI.parse(@input) rescue false
- return unless input_is_url
@input_path = File.join(@work_directory, safe_filename(@input))
@file_name = File.basename(@input_path, File.extname(@input_path))
download(@input, @input_path)
end
end
\ No newline at end of file