lib/elasticrawl/job_step.rb in elasticrawl-1.0.0 vs lib/elasticrawl/job_step.rb in elasticrawl-1.1.0
- old
+ new
@@ -11,11 +11,11 @@
#
# For parse jobs optionally specifies the maximum # of Common Crawl
# data files to process before the job exits.
def job_flow_step(job_config)
jar = job_config['jar']
- max_files = self.job.max_files
+ max_files = self.job.max_files
step_args = []
step_args[0] = job_config['class']
step_args[1] = self.input_paths
step_args[2] = self.output_path
@@ -33,11 +33,13 @@
# Sets the Elastic MapReduce job flow step name based on the type of job it
# belongs to.
def set_step_name
case self.job.type
when 'Elasticrawl::ParseJob'
- segment =self.crawl_segment.segment_name if self.crawl_segment.present?
- "Segment: #{segment}"
+ if self.crawl_segment.present?
+ max_files = self.job.max_files || 'all'
+ "#{self.crawl_segment.segment_desc} Parsing: #{max_files}"
+ end
when 'Elasticrawl::CombineJob'
paths = self.input_paths.split(',')
"Combining #{paths.count} jobs"
end
end