Sha256: f48a8d62f4ed0feb5aa8f246e7d1946f17363f9111275ebf37e2e8fea5d539d2
Contents?: true
Size: 1.44 KB
Versions: 1
Compression:
Stored size: 1.44 KB
Contents
#!/usr/bin/env ruby # frozen_string_literal: true require 'optparse' require_relative '../lib/just_crawl.rb' EM.threadpool_size = 5 options = {} optparse = OptionParser.new do |opts| opts.banner = "JustCrawl pages within a domain, reporting any page that returns a bad response code\nUsage: just_crawl [options] domain" opts.on('-s', '--start /home,/about', Array, 'Starting path(s), defaults to /') { |o| options[:start] = o } opts.on('-u', '--username username', String, 'Basic auth username') { |o| options[:username] = o } opts.on('-p', '--password password', String, 'Basic auth password') { |o| options[:password] = o } opts.on('-c', '--connections count', Integer, "Max mumber of parallel connections to use. The default is #{EM.threadpool_size}.") { |o| EM.threadpool_size = o } opts.on('-v', '--verbose', 'Give details when crawling') { |o| $verbose = o } opts.on_tail('-h', '--help', 'Show this message') { |o| puts opts; exit } opts.on_tail('-v', '--version', 'Print version') { |o| puts JustCrawl::VERSION; exit } end.parse! options.merge!(domain: optparse.first) unless options[:domain] puts 'Must provide a domain' exit -1 end options[:domain] = "http://#{options[:domain]}" unless options[:domain].include?('://') crawler = JustCrawl::Engine.new(options) trap('SIGINT') do puts "\n\nAborting just_crawl.." crawler.summarize exit -1 end crawler.run crawler.summarize exit -1 if crawler.errors? || crawler.no_links_found?
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
just_crawl-1.1.9 | bin/just_crawl |