#! /usr/bin/env ruby require 'retriever' options = {} optparse = OptionParser.new do|opts| # Set a banner, displayed at the top # of the help screen. opts.banner = "Usage: rr [MODE FLAG] [options] Target_URL" options[:sitemap] = false opts.on( '-s', '--sitemap FORMAT', 'MODE FLAG: Sitemap mode - Crawl site and output sitemap, format choices: CSV or XML' ) do |output_type| options[:sitemap] = output_type end options[:fileharvest] = false opts.on( '-f', '--files FILETYPE', 'MODE FLAG: Fileharvest mode - Crawl site and collect links for files found, extension for filetype' ) do |file_ext| options[:fileharvest] = file_ext end options[:filename] = nil opts.on( '-o', '--out FILENAME', 'Dump output to selected filename' ) do|filename| options[:filename] = filename end # Define the options, and what they do options[:verbose] = false opts.on( '-v', '--verbose', 'Output more information' ) do options[:verbose] = true end options[:progress] = false opts.on( '-p', '--progress', 'Output progress bar' ) do options[:progress] = true end options[:maxpages] = false opts.on( '-l', '--limit PAGE_LIMIT_#', 'set a max on the total number of crawled pages' ) do |maxpages| options[:maxpages] = maxpages end options[:autodown] = false opts.on( '-a', '--auto', 'Automatically download all files of filetype located' ) do options[:autodown] = true end # This displays the help screen, all programs are # assumed to have this option. opts.on( '-h', '--help', 'Display this screen' ) do puts opts exit end end optparse.parse! if ARGV[0].nil? abort("###Missing Required Argument\nUsage: rr [mode] [options] Target_URL") end ARGV.each do|q| if options[:verbose] puts "###############################" puts "### [RubyRetriever]" puts "### Creating Sitemap" if options[:sitemap] puts "### Outputting in format: #{options[:sitemap]}" if options[:sitemap] puts "### Performing File Harvest" if options[:fileharvest] puts "### Searching for file extension: #{options[:fileharvest]} pages" if (options[:fileharvest]) puts "### Writting output to filename: #{options[:filename]}" if options[:filename] puts "### Being verbose" puts "### Stopping after #{options[:maxpages]} pages" if options[:maxpages] end puts "###############################" puts "### [RubyRetriever] go fetch #{q}" test = Retriever::FetchFiles.new(q, options) if options[:fileharvest] test = Retriever::FetchSitemap.new(q, options) if options[:sitemap] puts "### [RubyRetriever] is done." puts "###############################" puts end