action.rb in documentcloud-cloud-crowd-0.2.1

- old
+ new

@@ -10,10 +10,13 @@
   # to be mapped into WorkUnits and processed in parallel. In the +merge+ step,
   # +input+ will be an array of all the resulting outputs from calling process.
   #
   # All actions have use of an individual +work_directory+, for scratch files,
   # and spend their duration inside of it, so relative paths work well.
+  #
+  # Note that Actions inherit a backticks (`) method that raises an Exception
+  # if the external command fails.
   class Action
     
     FILE_URL = /\Afile:\/\//
     
     attr_reader :input, :input_path, :file_name, :options, :work_directory
@@ -31,11 +34,11 @@
       status == MERGING ? parse_input : download_input
     end
     
     # Each Action subclass must implement a +process+ method, overriding this.
     def process
-      raise NotImplementedError.new("CloudCrowd::Actions must override 'process' with their own processing code.")
+      raise NotImplementedError, "CloudCrowd::Actions must override 'process' with their own processing code."
     end
     
     # Download a file to the specified path.
     def download(url, path)
       `curl -s "#{url}" > "#{path}"`
@@ -64,22 +67,31 @@
     # to the root directory (where workers run by default).
     def cleanup_work_directory
       FileUtils.rm_r(@work_directory) if File.exists?(@work_directory)
     end
     
+    # Actions have a backticks command that raises a CommandFailed exception 
+    # on failure, so that processing doesn't just blithely continue.
+    def `(command)
+      result    = super(command)
+      exit_code = $?.to_i
+      raise Error::CommandFailed.new(result, exit_code) unless exit_code == 0
+      result
+    end
     
+    
     private
     
     # Convert an unsafe URL into a filesystem-friendly filename.
     def safe_filename(url)
       ext  = File.extname(url)
       name = URI.unescape(File.basename(url)).gsub(/[^a-zA-Z0-9_\-.]/, '-').gsub(/-+/, '-')
       File.basename(name, ext).gsub('.', '-') + ext
     end
     
     # The directory prefix to use for both local and S3 storage.
-    # [action_name]/job_[job_id]/unit_[work_unit_it]
+    # [action]/job_[job_id]/unit_[work_unit_it]
     def storage_prefix
       path_parts = []
       path_parts << Inflector.underscore(self.class)
       path_parts << "job_#{@job_id}"
       path_parts << "unit_#{@work_unit_id}" if @work_unit_id
@@ -91,12 +103,12 @@
       @input = JSON.parse(@input)
     end
     
     # If the input is a URL, download the file before beginning processing.
     def download_input
+      input_is_url = !!URI.parse(@input) rescue false
+      return unless input_is_url
       Dir.chdir(@work_directory) do
-        input_is_url = !!URI.parse(@input) rescue false
-        return unless input_is_url
         @input_path = File.join(@work_directory, safe_filename(@input))
         @file_name = File.basename(@input_path, File.extname(@input_path))
         download(@input, @input_path)
       end
     end
\ No newline at end of file