lib/benchmark/http/spider.rb in benchmark-http-0.12.0 vs lib/benchmark/http/spider.rb in benchmark-http-0.13.0
- old
+ new
@@ -32,12 +32,13 @@
module Benchmark
module HTTP
class Spider
include Async::Await
- def initialize(depth: nil)
+ def initialize(depth: nil, ignore: nil)
@depth = depth
+ @ignore = ignore
end
def extract_links(url, response)
base = url
@@ -69,15 +70,17 @@
end
end.compact
end
async def fetch(statistics, client, url, depth = @depth, fetched = Set.new, &block)
- if depth.zero?
+ if depth&.zero?
Async.logger.warn(self) {"Exceeded depth while trying to visit #{url}!"}
return
elsif fetched.include?(url)
return
+ elsif @ignore&.match?(url.path)
+ return
end
fetched << url
request_uri = url.request_uri
@@ -88,10 +91,10 @@
if response.redirection?
location = url + response.headers['location']
if location.host == url.host
Async.logger.debug(self) {"Following redirect to #{location}..."}
- fetch(statistics, client, location, depth-1, fetched, &block).wait
+ fetch(statistics, client, location, depth&.-(1), fetched, &block).wait
return
else
Async.logger.debug(self) {"Ignoring redirect to #{location}."}
return
end