lib/cloud_crowd/action.rb in documentcloud-cloud-crowd-0.2.0 vs lib/cloud_crowd/action.rb in documentcloud-cloud-crowd-0.2.1

- old
+ new

@@ -10,10 +10,13 @@ # to be mapped into WorkUnits and processed in parallel. In the +merge+ step, # +input+ will be an array of all the resulting outputs from calling process. # # All actions have use of an individual +work_directory+, for scratch files, # and spend their duration inside of it, so relative paths work well. + # + # Note that Actions inherit a backticks (`) method that raises an Exception + # if the external command fails. class Action FILE_URL = /\Afile:\/\// attr_reader :input, :input_path, :file_name, :options, :work_directory @@ -31,11 +34,11 @@ status == MERGING ? parse_input : download_input end # Each Action subclass must implement a +process+ method, overriding this. def process - raise NotImplementedError.new("CloudCrowd::Actions must override 'process' with their own processing code.") + raise NotImplementedError, "CloudCrowd::Actions must override 'process' with their own processing code." end # Download a file to the specified path. def download(url, path) `curl -s "#{url}" > "#{path}"` @@ -64,22 +67,31 @@ # to the root directory (where workers run by default). def cleanup_work_directory FileUtils.rm_r(@work_directory) if File.exists?(@work_directory) end + # Actions have a backticks command that raises a CommandFailed exception + # on failure, so that processing doesn't just blithely continue. + def `(command) + result = super(command) + exit_code = $?.to_i + raise Error::CommandFailed.new(result, exit_code) unless exit_code == 0 + result + end + private # Convert an unsafe URL into a filesystem-friendly filename. def safe_filename(url) ext = File.extname(url) name = URI.unescape(File.basename(url)).gsub(/[^a-zA-Z0-9_\-.]/, '-').gsub(/-+/, '-') File.basename(name, ext).gsub('.', '-') + ext end # The directory prefix to use for both local and S3 storage. - # [action_name]/job_[job_id]/unit_[work_unit_it] + # [action]/job_[job_id]/unit_[work_unit_it] def storage_prefix path_parts = [] path_parts << Inflector.underscore(self.class) path_parts << "job_#{@job_id}" path_parts << "unit_#{@work_unit_id}" if @work_unit_id @@ -91,12 +103,12 @@ @input = JSON.parse(@input) end # If the input is a URL, download the file before beginning processing. def download_input + input_is_url = !!URI.parse(@input) rescue false + return unless input_is_url Dir.chdir(@work_directory) do - input_is_url = !!URI.parse(@input) rescue false - return unless input_is_url @input_path = File.join(@work_directory, safe_filename(@input)) @file_name = File.basename(@input_path, File.extname(@input_path)) download(@input, @input_path) end end \ No newline at end of file