lib/pupa/runner.rb in pupa-0.0.8 vs lib/pupa/runner.rb in pupa-0.0.9
- old
+ new
@@ -16,10 +16,11 @@
actions: [],
tasks: [],
output_dir: File.expand_path('scraped_data', Dir.pwd),
cache_dir: File.expand_path('web_cache', Dir.pwd),
expires_in: 86400, # 1 day
+ pipelined: false,
validate: true,
host_with_port: 'localhost:27017',
database: 'pupa',
dry_run: false,
level: 'INFO',
@@ -79,10 +80,13 @@
options.cache_dir = v
end
opts.on('-e', '--expires_in SECONDS', "The cache's expiration time in seconds") do |v|
options.expires_in = v
end
+ opts.on('--pipelined', 'Dump JSON documents all at once') do |v|
+ options.pipelined = v
+ end
opts.on('--[no-]validate', 'Validate JSON documents') do |v|
options.validate = v
end
opts.on('-H', '--host HOST:PORT', 'The host and port to MongoDB') do |v|
options.host_with_port = v
@@ -141,10 +145,11 @@
end
processor = @processor_class.new(options.output_dir,
cache_dir: options.cache_dir,
expires_in: options.expires_in,
+ pipelined: options.pipelined,
validate: options.validate,
level: options.level,
options: Hash[*rest])
options.actions.each do |action|
@@ -171,11 +176,11 @@
exit if options.dry_run
report = {
plan: {
processor: @processor_class,
- arguments: options.to_h,
+ arguments: options.dup.to_h,
options: rest,
},
start: Time.now.utc,
}
@@ -196,9 +201,9 @@
end
end
report[:end] = Time.now.utc
report[:time] = report[:end] - report[:start]
- puts JSON.dump(report)
+ puts MultiJson.dump(report)
end
end
end