#!/usr/bin/env ruby # frozen_string_literal: true require 'optparse' require_relative '../lib/just_crawl.rb' EM.threadpool_size = 5 options = {} optparse = OptionParser.new do |opts| opts.banner = "JustCrawl pages within a domain, reporting any page that returns a bad response code\nUsage: just_crawl [options] domain" opts.on('-s', '--start /home,/about', Array, 'Starting path(s), defaults to /') { |o| options[:start] = o } opts.on('-u', '--username username', String, 'Basic auth username') { |o| options[:username] = o } opts.on('-p', '--password password', String, 'Basic auth password') { |o| options[:password] = o } opts.on('-c', '--connections count', Integer, "Max mumber of parallel connections to use. The default is #{EM.threadpool_size}.") { |o| EM.threadpool_size = o } opts.on('-v', '--verbose', 'Give details when crawling') { |o| $verbose = o } opts.on_tail('-h', '--help', 'Show this message') { |o| puts opts; exit } opts.on_tail('-v', '--version', 'Print version') { |o| puts JustCrawl::VERSION; exit } end.parse! options.merge!(domain: optparse.first) unless options[:domain] puts 'Must provide a domain' exit -1 end options[:domain] = "http://#{options[:domain]}" unless options[:domain].include?('://') crawler = JustCrawl::Engine.new(options) trap('SIGINT') do puts "\n\nAborting just_crawl.." crawler.summarize exit -1 end crawler.run crawler.summarize exit -1 if crawler.errors? || crawler.no_links_found?