lib/elasticrawl/job_step.rb in elasticrawl-1.0.0 vs lib/elasticrawl/job_step.rb in elasticrawl-1.1.0

- old
+ new

@@ -11,11 +11,11 @@ # # For parse jobs optionally specifies the maximum # of Common Crawl # data files to process before the job exits. def job_flow_step(job_config) jar = job_config['jar'] - max_files = self.job.max_files + max_files = self.job.max_files step_args = [] step_args[0] = job_config['class'] step_args[1] = self.input_paths step_args[2] = self.output_path @@ -33,11 +33,13 @@ # Sets the Elastic MapReduce job flow step name based on the type of job it # belongs to. def set_step_name case self.job.type when 'Elasticrawl::ParseJob' - segment =self.crawl_segment.segment_name if self.crawl_segment.present? - "Segment: #{segment}" + if self.crawl_segment.present? + max_files = self.job.max_files || 'all' + "#{self.crawl_segment.segment_desc} Parsing: #{max_files}" + end when 'Elasticrawl::CombineJob' paths = self.input_paths.split(',') "Combining #{paths.count} jobs" end end