Sha256: afbc824c0088aebbd764e1affe6875ccbe50451f6d95d3fb6f0f6d788f6af34c
Contents?: true
Size: 895 Bytes
Versions: 2
Compression:
Stored size: 895 Bytes
Contents
#! /usr/bin/env ruby # == Synopsis # Crawls a site starting at the given URL, and outputs a count of # the number of Pages at each depth in the site. # # == Usage # anemone_pagedepth.rb url # # == Author # Chris Kite $:.unshift File.join(File.dirname(__FILE__), "..", "lib") require 'anemone' require 'rdoc/usage' # make sure that the first option is a URL we can crawl begin URI(ARGV[0]) rescue RDoc::usage() Process.exit end root = ARGV[0] Anemone.crawl(root) do |anemone| anemone.skip_links_like %r{^/c/$}, %r{^/stores/$} anemone.after_crawl do |pages| pages = pages.shortest_paths!(root).uniq depths = pages.values.inject({}) do |depths, page| depths[page.depth] ||= 0 depths[page.depth] += 1 depths end depths.sort.each { |depth, count| puts "Depth: #{depth} Count: #{count}" } end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
anemone-0.0.1 | bin/anemone_pagedepth.rb |
anemone-0.0.2 | bin/anemone_pagedepth.rb |