lib/anemone/core.rb in anemone-0.5.0 vs lib/anemone/core.rb in anemone-0.6.0

- old
+ new

@@ -7,11 +7,11 @@ require 'anemone/storage' require 'anemone/storage/base' module Anemone - VERSION = '0.5.0'; + VERSION = '0.6.0'; # # Convenience method to start a crawl # def Anemone.crawl(urls, options = {}, &block) @@ -47,11 +47,17 @@ # Hash of cookie name => value to send with HTTP requests :cookies => nil, # accept cookies from the server and send them back? :accept_cookies => false, # skip any link with a query string? e.g. http://foo.com/?u=user - :skip_query_strings => false + :skip_query_strings => false, + # proxy server hostname + :proxy_host => nil, + # proxy server port number + :proxy_port => false, + # HTTP read timeout in seconds + :read_timeout => nil } # Create setter methods for all options to be called from the crawl block DEFAULT_OPTS.keys.each do |key| define_method "#{key}=" do |value| @@ -258,9 +264,11 @@ # is granted access in it. Always returns +true+ when we are # not obeying robots.txt. # def allowed(link) @opts[:obey_robots_txt] ? @robots.allowed?(link) : true + rescue + false end # # Returns +true+ if we are over the page depth limit. # This only works when coming from a page and with the +depth_limit+ option set.