Sha256: 0630fa396312202dc122e97f6ba31aa7f35bc7e328afdc8d15d1e56d634ddb9d

Contents?: true

Size: 892 Bytes

Versions: 14

Compression:

Stored size: 892 Bytes

Contents

#! /usr/bin/env ruby
# == Synopsis
#   Crawls a site starting at the given URL, and outputs a count of
#   the number of Pages at each depth in the site.
#
# == Usage
#   anemone_pagedepth.rb url
#
# == Author
#   Chris Kite

$:.unshift File.join(File.dirname(__FILE__), "..", "lib")

require 'anemone'

def usage
  puts <<END
Usage: anemone_pagedepth.rb url
END
end

# make sure that the first option is a URL we can crawl
begin
  URI(ARGV[0])
rescue
  usage
  Process.exit 
end

root = ARGV[0]
Anemone.crawl(root) do |anemone|
  anemone.skip_links_like %r{^/c/$}, %r{^/stores/$}
  
  anemone.after_crawl do |pages|
    pages = pages.shortest_paths!(root).uniq
    depths = pages.values.inject({}) do |depths, page|
      depths[page.depth] ||= 0
      depths[page.depth] += 1
      depths
    end
    
    depths.sort.each { |depth, count| puts "Depth: #{depth} Count: #{count}" }
  end
end

Version data entries

14 entries across 14 versions & 4 rubygems

Version Path
chriskite-anemone-0.0.4 bin/anemone_pagedepth.rb
chriskite-anemone-0.0.5 bin/anemone_pagedepth.rb
chriskite-anemone-0.0.6 bin/anemone_pagedepth.rb
chriskite-anemone-0.1.0 bin/anemone_pagedepth.rb
jeremyf-anemone-0.1.3 bin/anemone_pagedepth.rb
parolkar-anemone-0.1.2 bin/anemone_pagedepth.rb
anemone-0.2.0 bin/anemone_pagedepth.rb
anemone-0.1.2 bin/anemone_pagedepth.rb
anemone-0.0.5 bin/anemone_pagedepth.rb
anemone-0.0.6 bin/anemone_pagedepth.rb
anemone-0.1.0 bin/anemone_pagedepth.rb
anemone-0.1.1 bin/anemone_pagedepth.rb
anemone-0.0.3 bin/anemone_pagedepth.rb
anemone-0.0.4 bin/anemone_pagedepth.rb