Sha256: 8851a0c24d92547537c08be212c0789558ea4f159e28d6b638d1b2a4d55f4e7a
Contents?: true
Size: 810 Bytes
Versions: 5
Compression:
Stored size: 810 Bytes
Contents
require 'anemone' require 'optparse' require 'ostruct' begin # make sure that the first option is a URL we can crawl root = URI(ARGV[0]) rescue puts <<-INFO Usage: anemone serialize [options] <url> Synopsis: Crawls a site starting at the given URL and saves the resulting PageHash object to a file using Marshal serialization. Options: -o, --output filename Filename to save PageHash to. Defaults to crawl.{Time.now} INFO exit(0) end options = OpenStruct.new options.output_file = "crawl.#{Time.now.to_i}" # parse command-line options opts = OptionParser.new opts.on('-o', '--output filename') {|o| options.output_file = o } opts.parse!(ARGV) Anemone.crawl(root) do |anemone| anemone.after_crawl do |pages| open(options.output_file, 'w') {|f| Marshal.dump(pages, f)} end end
Version data entries
5 entries across 5 versions & 3 rubygems