Sha256: 667dcd2adb967db5d66ac05c556c6f9a9eafee9719bfa669b6f4116f760789f4
Contents?: true
Size: 813 Bytes
Versions: 24
Compression:
Stored size: 813 Bytes
Contents
require 'anemone' require 'optparse' require 'ostruct' options = OpenStruct.new options.relative = false begin # make sure that the last option is a URL we can crawl root = URI(ARGV.last) rescue puts <<-INFO Usage: anemone url-list [options] <url> Synopsis: Crawls a site starting at the given URL, and outputs the URL of each page in the domain as they are encountered. Options: -r, --relative Output relative URLs (rather than absolute) INFO exit(0) end # parse command-line options opts = OptionParser.new opts.on('-r', '--relative') { options.relative = true } opts.parse!(ARGV) Anemone.crawl(root, :discard_page_bodies => true) do |anemone| anemone.on_every_page do |page| if options.relative puts page.url.path else puts page.url end end end
Version data entries
24 entries across 24 versions & 5 rubygems