Sha256: 7fa2da30e3d0038647d915e5ae27ab5d9ff536dfd33fb82a60cb988c3369a954
Contents?: true
Size: 1019 Bytes
Versions: 2
Compression:
Stored size: 1019 Bytes
Contents
#! /usr/bin/env ruby # == Synopsis # Crawls a site starting at the given URL, and saves the resulting # PageHash object to a file using Marshal serialization. # # == Usage # anemone_serialize.rb [options] url # # == Options # -o, --output filename Filename to save PageHash to. Defaults to crawl.{Time.now} # # == Author # Chris Kite $:.unshift File.join(File.dirname(__FILE__), "..", "lib") require 'anemone' require 'optparse' require 'rdoc/usage' require 'ostruct' # make sure that the first option is a URL we can crawl begin URI(ARGV[0]) rescue RDoc::usage() Process.exit end options = OpenStruct.new options.output_file = "crawl.#{Time.now.to_i}" # parse command-line options opts = OptionParser.new opts.on('-o', '--output filename') {|o| options.output_file = o } opts.parse!(ARGV) root = ARGV[0] Anemone.crawl(root) do |anemone| anemone.after_crawl do |pages| open(options.output_file, 'w') {|f| Marshal.dump(pages, f)} end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
anemone-0.0.1 | bin/anemone_serialize.rb |
anemone-0.0.2 | bin/anemone_serialize.rb |