Sha256: 3e9d11fdd436b41c49a58deae232176ef6947ad26308b6332e514ed9e177879a

Contents?: true

Size: 1.9 KB

Versions: 1

Compression:

Stored size: 1.9 KB

Contents

#!/usr/bin/env ruby
require 'krawler'
require 'optparse'

options = {}
optparse = OptionParser.new do |opts|
  opts.banner = 'Usage: krawl [base url] [options]'

  opts.separator ''
  opts.separator 'Specific options:'

  opts.on('-e', '--exclude regex', 'Exclude matching paths') do |e|
    options[:e] = e
  end

  opts.on('-i', '--include regex', 'Include matching paths regardless of sub path restriction') do |i|
    options[:i] = i
  end

  opts.on('-s', '--sub-restrict', 'Restrict to sub paths of base url', 'Default: false') do |s|
    options[:s] = true
  end

  opts.on('-d', '--ignore-domain', 'Ignore domain restrictions', 'Default: false') do |d|
    options[:s] = true
  end

  opts.on('-c', '--concurrent count', 'Crawl with count number of concurrent connections', 'Default: 4') do |c|
    options[:d] = true
  end

  opts.on('-r', '--randomize', 'Randomize crawl path', 'Default: true') do |r|
    options[:r] = r
  end

  opts.on('-n', '--no-cache', 'Send Cache-Control: no-cache header', 'Default: false') do |r|
    options[:nc] = true
  end

  opts.on('-l[login_url]', '--login_url[=login_url]', 'Login URL') do |login_url|
    options[:l] = login_url
  end

  opts.on('-u[username]', '--username[=username]', 'Username') do |username|
    options[:u] = username
  end

  opts.on('-p[password]', '--password[=password]', 'Password') do |password|
    options[:p] = password
  end

  opts.separator ''

  opts.on('-h', '--help', 'Show this help message.') { puts opts; exit }

end
optparse.parse!

if ARGV.empty? || !(ARGV[0] =~ /^http/)
  puts optparse
  exit(-1)
end

Krawler::Base.new(ARGV[0] || 'http://localhost:3000/', {
  :exclude   => options[:e],
  :include   => options[:i],
  :restrict  => options[:s],
  :threads   => options[:c],
  :randomize => options[:r],
  :no_cache  => options[:nc],
  :username  => options[:u],
  :password  => options[:p],
  :login_url => options[:l],
  :domain    => options[:d]
}).base

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
krawler-1.0.12 bin/krawl