lib/anemone/core.rb in anemone-0.5.0 vs lib/anemone/core.rb in anemone-0.6.0
- old
+ new
@@ -7,11 +7,11 @@
require 'anemone/storage'
require 'anemone/storage/base'
module Anemone
- VERSION = '0.5.0';
+ VERSION = '0.6.0';
#
# Convenience method to start a crawl
#
def Anemone.crawl(urls, options = {}, &block)
@@ -47,11 +47,17 @@
# Hash of cookie name => value to send with HTTP requests
:cookies => nil,
# accept cookies from the server and send them back?
:accept_cookies => false,
# skip any link with a query string? e.g. http://foo.com/?u=user
- :skip_query_strings => false
+ :skip_query_strings => false,
+ # proxy server hostname
+ :proxy_host => nil,
+ # proxy server port number
+ :proxy_port => false,
+ # HTTP read timeout in seconds
+ :read_timeout => nil
}
# Create setter methods for all options to be called from the crawl block
DEFAULT_OPTS.keys.each do |key|
define_method "#{key}=" do |value|
@@ -258,9 +264,11 @@
# is granted access in it. Always returns +true+ when we are
# not obeying robots.txt.
#
def allowed(link)
@opts[:obey_robots_txt] ? @robots.allowed?(link) : true
+ rescue
+ false
end
#
# Returns +true+ if we are over the page depth limit.
# This only works when coming from a page and with the +depth_limit+ option set.