lib/pupa/runner.rb in pupa-0.0.13 vs lib/pupa/runner.rb in pupa-0.1.0

- old
+ new

@@ -1,10 +1,8 @@ require 'optparse' require 'ostruct' -require 'moped' - module Pupa class Runner attr_reader :options, :actions # @param [Pupa::Processor] a processor class @@ -14,18 +12,17 @@ @options = OpenStruct.new({ actions: [], tasks: [], output_dir: File.expand_path('scraped_data', Dir.pwd), + pipelined: false, cache_dir: File.expand_path('web_cache', Dir.pwd), expires_in: 86400, # 1 day - pipelined: false, + database_url: 'mongodb://localhost:27017/pupa', validate: true, - host_with_port: 'localhost:27017', - database: 'pupa', - dry_run: false, level: 'INFO', + dry_run: false, }.merge(defaults)) @actions = { 'scrape' => 'Scrapes data from online sources', 'import' => 'Imports scraped data into a database', @@ -74,40 +71,37 @@ options.tasks << v end opts.on('-o', '--output_dir PATH', 'The directory or Redis address (e.g. redis://localhost:6379/0) in which to dump JSON documents') do |v| options.output_dir = v end + opts.on('--pipelined', 'Dump JSON documents all at once') do |v| + options.pipelined = v + end opts.on('-c', '--cache_dir PATH', 'The directory or Memcached address (e.g. memcached://localhost:11211) in which to cache HTTP requests') do |v| options.cache_dir = v end opts.on('-e', '--expires_in SECONDS', "The cache's expiration time in seconds") do |v| options.expires_in = v end - opts.on('--pipelined', 'Dump JSON documents all at once') do |v| - options.pipelined = v + opts.on('-d', '--database_url SCHEME://USERNAME:PASSWORD@HOST:PORT/DATABASE', 'The database URL') do |v| + options.database_url = v end opts.on('--[no-]validate', 'Validate JSON documents') do |v| options.validate = v end - opts.on('-H', '--host HOST:PORT', 'The host and port to MongoDB') do |v| - options.host_with_port = v - end - opts.on('-d', '--database NAME', 'The name of the MongoDB database') do |v| - options.database = v - end - opts.on('-n', '--dry-run', 'Show the plan without running any actions') do - options.dry_run = true - end opts.on('-v', '--verbose', 'Show all messages') do options.level = 'DEBUG' end opts.on('-q', '--quiet', 'Show only warning and error messages') do options.level = 'WARN' end opts.on('-s', '--silent', 'Show no messages') do options.level = 'UNKNOWN' end + opts.on('-n', '--dry-run', 'Show the plan without running any actions') do + options.dry_run = true + end opts.separator '' opts.separator 'Common options:' opts.on_tail('-h', '--help', 'Show this message') do puts opts @@ -143,13 +137,14 @@ if options.tasks.empty? options.tasks = @processor_class.tasks end processor = @processor_class.new(options.output_dir, + pipelined: options.pipelined, cache_dir: options.cache_dir, expires_in: options.expires_in, - pipelined: options.pipelined, + database_url: options.database_url, validate: options.validate, level: options.level, options: Hash[*rest]) options.actions.each do |action| @@ -163,11 +158,11 @@ puts "actions: #{options.actions.join(', ')}" puts "tasks: #{options.tasks.join(', ')}" end if options.level == 'DEBUG' - %w(output_dir cache_dir expires_in host_with_port database level).each do |option| + %w(output_dir pipelined cache_dir expires_in database_url validate level).each do |option| puts "#{option}: #{options[option]}" end unless rest.empty? puts "options: #{rest.join(' ')}" end @@ -181,11 +176,9 @@ options: Marshal.load(Marshal.dump(options)).to_h, arguments: rest, }, start: Time.now.utc, } - - Pupa.session = Moped::Session.new([options.host_with_port], database: options.database) if options.actions.delete('scrape') processor.store.clear report[:scrape] = {} options.tasks.each do |task_name|