job_step.rb in elasticrawl-1.1.0

- old
+ new

@@ -11,11 +11,11 @@
     #
     # For parse jobs optionally specifies the maximum # of Common Crawl
     # data files to process before the job exits.
     def job_flow_step(job_config)
       jar = job_config['jar']
-      max_files = self.job.max_files 
+      max_files = self.job.max_files
 
       step_args = []
       step_args[0] = job_config['class']
       step_args[1] = self.input_paths
       step_args[2] = self.output_path
@@ -33,11 +33,13 @@
     # Sets the Elastic MapReduce job flow step name based on the type of job it
     # belongs to.
     def set_step_name
       case self.job.type
         when 'Elasticrawl::ParseJob'
-          segment =self.crawl_segment.segment_name if self.crawl_segment.present?
-          "Segment: #{segment}"
+          if self.crawl_segment.present?
+            max_files = self.job.max_files || 'all'
+            "#{self.crawl_segment.segment_desc} Parsing: #{max_files}"
+          end
         when 'Elasticrawl::CombineJob'
           paths = self.input_paths.split(',')
           "Combining #{paths.count} jobs"
       end
     end