Sha256: e6707b2dab56510d5baf6bc4ffee8d27be002bd2c5fef256ccf27c6eac426778

Contents?: true

Size: 699 Bytes

Versions: 17

Compression:

Stored size: 699 Bytes

Contents

require 'anemone'

begin
  # make sure that the first option is a URL we can crawl
  root = URI(ARGV[0])
rescue
  puts <<-INFO
Usage:
  anemone pagedepth <url>

Synopsis:
  Crawls a site starting at the given URL and outputs a count of
  the number of pages at each depth of the crawl.
INFO
  exit(0)
end

Anemone.crawl(root) do |anemone|
  anemone.skip_links_like %r{^/c/$}, %r{^/stores/$}

  anemone.after_crawl do |pages|
    pages = pages.shortest_paths!(root).uniq!

    depths = pages.values.inject({}) do |depths, page|
      depths[page.depth] ||= 0
      depths[page.depth] += 1
      depths
    end

    depths.sort.each { |depth, count| puts "Depth: #{depth} Count: #{count}" }
  end
end

Version data entries

17 entries across 17 versions & 4 rubygems

Version Path
sutch-anemone-0.7.2.2 lib/anemone/cli/pagedepth.rb
sutch-anemone-0.7.2.1 lib/anemone/cli/pagedepth.rb
sutch-anemone-0.7.2 lib/anemone/cli/pagedepth.rb
rodneyc-anemone-0.7.1.3 lib/anemone/cli/pagedepth.rb
rodneyc-anemone-0.7.1.2 lib/anemone/cli/pagedepth.rb
rodneyc-anemone-0.7.1.1 lib/anemone/cli/pagedepth.rb
anemone-0.7.2 lib/anemone/cli/pagedepth.rb
anemone-0.7.1 lib/anemone/cli/pagedepth.rb
anemone-0.7.0 lib/anemone/cli/pagedepth.rb
anemone-0.6.1 lib/anemone/cli/pagedepth.rb
anemone-0.6.0 lib/anemone/cli/pagedepth.rb
anemone-0.5.0 lib/anemone/cli/pagedepth.rb
spk-anemone-0.4.0 lib/anemone/cli/pagedepth.rb
anemone-0.4.0 lib/anemone/cli/pagedepth.rb
anemone-0.3.2 lib/anemone/cli/pagedepth.rb
spk-anemone-0.3.1 lib/anemone/cli/pagedepth.rb
anemone-0.3.1 lib/anemone/cli/pagedepth.rb