Sha256: 667dcd2adb967db5d66ac05c556c6f9a9eafee9719bfa669b6f4116f760789f4

Contents?: true

Size: 813 Bytes

Versions: 24

Compression:

Stored size: 813 Bytes

Contents

require 'anemone'
require 'optparse'
require 'ostruct'

options = OpenStruct.new
options.relative = false

begin
  # make sure that the last option is a URL we can crawl
  root = URI(ARGV.last)
rescue
  puts <<-INFO
Usage:
  anemone url-list [options] <url>
    
Synopsis:
  Crawls a site starting at the given URL, and outputs the URL of each page
  in the domain as they are encountered.

Options:
  -r, --relative      Output relative URLs (rather than absolute)
INFO
  exit(0)
end

# parse command-line options
opts = OptionParser.new
opts.on('-r', '--relative') { options.relative = true }
opts.parse!(ARGV)

Anemone.crawl(root, :discard_page_bodies => true) do |anemone|
  
  anemone.on_every_page do |page|
    if options.relative
      puts page.url.path
    else
      puts page.url
    end
  end
  
end

Version data entries

24 entries across 24 versions & 5 rubygems

Version Path
sutch-anemone-0.7.2.2 lib/anemone/cli/url_list.rb
sutch-anemone-0.7.2.1 lib/anemone/cli/url_list.rb
sutch-anemone-0.7.2 lib/anemone/cli/url_list.rb
rodneyc-anemone-0.7.1.3 lib/anemone/cli/url_list.rb
rodneyc-anemone-0.7.1.2 lib/anemone/cli/url_list.rb
rodneyc-anemone-0.7.1.1 lib/anemone/cli/url_list.rb
anemone-0.7.2 lib/anemone/cli/url_list.rb
anemone-0.7.1 lib/anemone/cli/url_list.rb
anemone-0.7.0 lib/anemone/cli/url_list.rb
anemone-0.6.1 lib/anemone/cli/url_list.rb
anemone-0.6.0 lib/anemone/cli/url_list.rb
anemone-0.5.0 lib/anemone/cli/url_list.rb
spk-anemone-0.4.0 lib/anemone/cli/url_list.rb
anemone-0.4.0 lib/anemone/cli/url_list.rb
anemone-0.3.2 lib/anemone/cli/url_list.rb
spk-anemone-0.3.1 lib/anemone/cli/url_list.rb
anemone-0.3.1 lib/anemone/cli/url_list.rb
spk-anemone-0.3.0 lib/anemone/cli/url_list.rb
anemone-0.3.0 lib/anemone/cli/url_list.rb
spk-anemone-0.2.4 lib/anemone/cli/url_list.rb