Sha256: cd027d3a33c595a49c78cf2f43d28a0cb1ed03f247526681687d8caf9bd3449c

Contents?: true

Size: 975 Bytes

Versions: 2

Compression:

Stored size: 975 Bytes

Contents

#! /usr/bin/env ruby
# == Synopsis
#   Crawls a site starting at the given URL, and outputs the URL of each page
#   in the domain as they are encountered.
#
# == Usage
#   anemone_url_list.rb [options] url
#
# == Options
#   -r, --relative          Output relative URLs (rather than absolute)
#
# == Author
#   Chris Kite

$:.unshift File.join(File.dirname(__FILE__), "..", "lib")

require 'anemone'
require 'optparse'
require 'rdoc/usage'
require 'ostruct'

options = OpenStruct.new
options.relative = false

# make sure that the last option is a URL we can crawl
begin
  URI(ARGV.last)
rescue
  RDoc::usage()
  Process.exit 
end

# parse command-line options
opts = OptionParser.new
opts.on('-r', '--relative') { options.relative = true }
opts.parse!(ARGV)

Anemone.crawl(ARGV.last) do |anemone|  
  anemone.on_every_page do |page|
    if options.relative
      puts page.url.path
    else
      puts page.url
    end
  end
end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
anemone-0.0.1 bin/anemone_url_list.rb
anemone-0.0.2 bin/anemone_url_list.rb