Sha256: 8851a0c24d92547537c08be212c0789558ea4f159e28d6b638d1b2a4d55f4e7a

Contents?: true

Size: 810 Bytes

Versions: 5

Compression:

Stored size: 810 Bytes

Contents

require 'anemone'
require 'optparse'
require 'ostruct'

begin
  # make sure that the first option is a URL we can crawl
  root = URI(ARGV[0])
rescue
  puts <<-INFO
Usage:
  anemone serialize [options] <url>

Synopsis:
  Crawls a site starting at the given URL and saves the resulting
  PageHash object to a file using Marshal serialization.

Options:
  -o, --output filename      Filename to save PageHash to. Defaults to crawl.{Time.now}
INFO
  exit(0)
end

options = OpenStruct.new
options.output_file = "crawl.#{Time.now.to_i}"

# parse command-line options
opts = OptionParser.new
opts.on('-o', '--output filename') {|o| options.output_file = o }
opts.parse!(ARGV)

Anemone.crawl(root) do |anemone|
  anemone.after_crawl do |pages|
    open(options.output_file, 'w') {|f| Marshal.dump(pages, f)}
  end
end

Version data entries

5 entries across 5 versions & 3 rubygems

Version Path
spk-anemone-0.2.4 lib/anemone/cli/serialize.rb
shingara-anemone-0.2.4 lib/anemone/cli/serialize.rb
anemone-0.2.3 lib/anemone/cli/serialize.rb
anemone-0.2.2 lib/anemone/cli/serialize.rb
anemone-0.2.1 lib/anemone/cli/serialize.rb