Sha256: b7b465b86765527f36a74e423ed2d17a1bbc9398c3a63a1245ecd6daf07547a2

Contents?: true

Size: 1.06 KB

Versions: 12

Compression:

Stored size: 1.06 KB

Contents

#! /usr/bin/env ruby
# == Synopsis
#   Crawls a site starting at the given URL, and outputs the URL of each page
#   in the domain as they are encountered.
#
# == Usage
#   anemone_url_list.rb [options] url
#
# == Options
#   -r, --relative          Output relative URLs (rather than absolute)
#
# == Author
#   Chris Kite

$:.unshift File.join(File.dirname(__FILE__), "..", "lib")

require 'anemone'
require 'optparse'
require 'ostruct'

def usage
  puts <<END
Usage: anemone_url_list.rb [options] url
    
Options:
  -r, --relative      Output relative URLs (rather than absolute)
END
end

options = OpenStruct.new
options.relative = false

# make sure that the last option is a URL we can crawl
begin
  URI(ARGV.last)
rescue
  usage
  Process.exit 
end

# parse command-line options
opts = OptionParser.new
opts.on('-r', '--relative') { options.relative = true }
opts.parse!(ARGV)

Anemone.crawl(ARGV.last, :discard_page_bodies => true) do |anemone|  
  anemone.on_every_page do |page|
    if options.relative
      puts page.url.path
    else
      puts page.url
    end
  end
end

Version data entries

12 entries across 12 versions & 3 rubygems

Version Path
chriskite-anemone-0.0.4 bin/anemone_url_list.rb
chriskite-anemone-0.0.5 bin/anemone_url_list.rb
chriskite-anemone-0.0.6 bin/anemone_url_list.rb
chriskite-anemone-0.1.0 bin/anemone_url_list.rb
parolkar-anemone-0.1.2 bin/anemone_url_list.rb
anemone-0.2.0 bin/anemone_url_list.rb
anemone-0.1.2 bin/anemone_url_list.rb
anemone-0.0.5 bin/anemone_url_list.rb
anemone-0.0.6 bin/anemone_url_list.rb
anemone-0.1.0 bin/anemone_url_list.rb
anemone-0.1.1 bin/anemone_url_list.rb
anemone-0.0.4 bin/anemone_url_list.rb