lib/pupa/runner.rb in pupa-0.0.13 vs lib/pupa/runner.rb in pupa-0.1.0
- old
+ new
@@ -1,10 +1,8 @@
require 'optparse'
require 'ostruct'
-require 'moped'
-
module Pupa
class Runner
attr_reader :options, :actions
# @param [Pupa::Processor] a processor class
@@ -14,18 +12,17 @@
@options = OpenStruct.new({
actions: [],
tasks: [],
output_dir: File.expand_path('scraped_data', Dir.pwd),
+ pipelined: false,
cache_dir: File.expand_path('web_cache', Dir.pwd),
expires_in: 86400, # 1 day
- pipelined: false,
+ database_url: 'mongodb://localhost:27017/pupa',
validate: true,
- host_with_port: 'localhost:27017',
- database: 'pupa',
- dry_run: false,
level: 'INFO',
+ dry_run: false,
}.merge(defaults))
@actions = {
'scrape' => 'Scrapes data from online sources',
'import' => 'Imports scraped data into a database',
@@ -74,40 +71,37 @@
options.tasks << v
end
opts.on('-o', '--output_dir PATH', 'The directory or Redis address (e.g. redis://localhost:6379/0) in which to dump JSON documents') do |v|
options.output_dir = v
end
+ opts.on('--pipelined', 'Dump JSON documents all at once') do |v|
+ options.pipelined = v
+ end
opts.on('-c', '--cache_dir PATH', 'The directory or Memcached address (e.g. memcached://localhost:11211) in which to cache HTTP requests') do |v|
options.cache_dir = v
end
opts.on('-e', '--expires_in SECONDS', "The cache's expiration time in seconds") do |v|
options.expires_in = v
end
- opts.on('--pipelined', 'Dump JSON documents all at once') do |v|
- options.pipelined = v
+ opts.on('-d', '--database_url SCHEME://USERNAME:PASSWORD@HOST:PORT/DATABASE', 'The database URL') do |v|
+ options.database_url = v
end
opts.on('--[no-]validate', 'Validate JSON documents') do |v|
options.validate = v
end
- opts.on('-H', '--host HOST:PORT', 'The host and port to MongoDB') do |v|
- options.host_with_port = v
- end
- opts.on('-d', '--database NAME', 'The name of the MongoDB database') do |v|
- options.database = v
- end
- opts.on('-n', '--dry-run', 'Show the plan without running any actions') do
- options.dry_run = true
- end
opts.on('-v', '--verbose', 'Show all messages') do
options.level = 'DEBUG'
end
opts.on('-q', '--quiet', 'Show only warning and error messages') do
options.level = 'WARN'
end
opts.on('-s', '--silent', 'Show no messages') do
options.level = 'UNKNOWN'
end
+ opts.on('-n', '--dry-run', 'Show the plan without running any actions') do
+ options.dry_run = true
+ end
opts.separator ''
opts.separator 'Common options:'
opts.on_tail('-h', '--help', 'Show this message') do
puts opts
@@ -143,13 +137,14 @@
if options.tasks.empty?
options.tasks = @processor_class.tasks
end
processor = @processor_class.new(options.output_dir,
+ pipelined: options.pipelined,
cache_dir: options.cache_dir,
expires_in: options.expires_in,
- pipelined: options.pipelined,
+ database_url: options.database_url,
validate: options.validate,
level: options.level,
options: Hash[*rest])
options.actions.each do |action|
@@ -163,11 +158,11 @@
puts "actions: #{options.actions.join(', ')}"
puts "tasks: #{options.tasks.join(', ')}"
end
if options.level == 'DEBUG'
- %w(output_dir cache_dir expires_in host_with_port database level).each do |option|
+ %w(output_dir pipelined cache_dir expires_in database_url validate level).each do |option|
puts "#{option}: #{options[option]}"
end
unless rest.empty?
puts "options: #{rest.join(' ')}"
end
@@ -181,11 +176,9 @@
options: Marshal.load(Marshal.dump(options)).to_h,
arguments: rest,
},
start: Time.now.utc,
}
-
- Pupa.session = Moped::Session.new([options.host_with_port], database: options.database)
if options.actions.delete('scrape')
processor.store.clear
report[:scrape] = {}
options.tasks.each do |task_name|