Sha256: 3de4b86e340b8a767b62be91fe253afd4b80b505034e3c2f4eca7388615fd27a
Contents?: true
Size: 708 Bytes
Versions: 7
Compression:
Stored size: 708 Bytes
Contents
require 'anemone' begin # make sure that the first option is a URL we can crawl root = URI(ARGV[0]) rescue puts <<-INFO Usage: anemone pagedepth <url> Synopsis: Crawls a site starting at the given URL and outputs a count of the number of pages at each depth of the crawl. INFO exit(0) end Anemone.crawl(root) do |anemone| anemone.skip_links_like %r{^/c/$}, %r{^/stores/$} anemone.after_crawl do |pages| pages = pages.shortest_paths!(root).uniq depths = pages.values.inject({}) do |depths, page| depths[page.depth] ||= 0 depths[page.depth] += 1 depths end depths.sort.each { |depth, count| puts "Depth: #{depth} Count: #{count}" } end end
Version data entries
7 entries across 7 versions & 3 rubygems