Sha256: f48a8d62f4ed0feb5aa8f246e7d1946f17363f9111275ebf37e2e8fea5d539d2

Contents?: true

Size: 1.44 KB

Versions: 1

Compression:

Stored size: 1.44 KB

Contents

#!/usr/bin/env ruby
# frozen_string_literal: true

require 'optparse'
require_relative '../lib/just_crawl.rb'

EM.threadpool_size = 5

options = {}
optparse = OptionParser.new do |opts|
  opts.banner = "JustCrawl pages within a domain, reporting any page that returns a bad response code\nUsage: just_crawl [options] domain"
  opts.on('-s', '--start /home,/about', Array, 'Starting path(s), defaults to /') { |o| options[:start] = o }
  opts.on('-u', '--username username', String, 'Basic auth username') { |o| options[:username] = o }
  opts.on('-p', '--password password', String, 'Basic auth password') { |o| options[:password] = o }
  opts.on('-c', '--connections count', Integer, "Max mumber of parallel connections to use. The default is #{EM.threadpool_size}.") { |o| EM.threadpool_size = o }
  opts.on('-v', '--verbose', 'Give details when crawling') { |o| $verbose = o }
  opts.on_tail('-h', '--help', 'Show this message') { |o| puts opts; exit }
  opts.on_tail('-v', '--version', 'Print version') { |o| puts JustCrawl::VERSION; exit }
end.parse!

options.merge!(domain: optparse.first)

unless options[:domain]
  puts 'Must provide a domain'
  exit -1
end

options[:domain] = "http://#{options[:domain]}" unless options[:domain].include?('://')

crawler = JustCrawl::Engine.new(options)

trap('SIGINT') do
  puts "\n\nAborting just_crawl.."
  crawler.summarize
  exit -1
end

crawler.run
crawler.summarize

exit -1 if crawler.errors? || crawler.no_links_found?

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
just_crawl-1.1.9 bin/just_crawl