Sha256: f5d373b91ae83a0ae1943f7665b46fbc18022e6b34814f2b636d4303bfa66bac
Contents?: true
Size: 1.41 KB
Versions: 1
Compression:
Stored size: 1.41 KB
Contents
require 'ostruct' require 'anemone/core' module Anemone # Version number VERSION = '0.2.1' # default options DEFAULTS = { # run 4 Tentacle threads to fetch pages :threads => 4, # disable verbose output :verbose => false, # don't throw away the page response body after scanning it for links :discard_page_bodies => false, # identify self as Anemone/VERSION :user_agent => "Anemone/#{VERSION}", # no delay between requests :delay => 0, # don't obey the robots exclusion protocol :obey_robots_txt => false, # by default, don't limit the depth of the crawl :depth_limit => false, # number of times HTTP redirects will be followed :redirect_limit => 5 } def self.options @options ||= OpenStruct.new(DEFAULTS) end # # Convenience method to start a crawl using Core # def Anemone.crawl(urls, options = {}, &block) options.each { |key, value| Anemone.options.send("#{key}=", value) } if Anemone.options.obey_robots_txt begin require 'robots' rescue LoadError warn "To support the robot exclusion protocol, install the robots gem:\n" \ "sudo gem sources -a http://gems.github.com\n" \ "sudo gem install fizx-robots" exit end end #use a single thread if a delay was requested Anemone.options.threads = 1 if Anemone.options.delay > 0 Core.crawl(urls, &block) end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
anemone-0.2.1 | lib/anemone/anemone.rb |