lib/sitediff/fetch.rb in sitediff-0.0.5 vs lib/sitediff/fetch.rb in sitediff-0.0.6
- old
+ new
@@ -6,16 +6,19 @@
class SiteDiff
class Fetch
# Cache is a cache object, see sitediff/cache
# Paths is a list of sub-paths
# Tags is a hash of tag names => base URLs.
- def initialize(cache, paths, concurrency = 3, curl_opts = nil, **tags)
+ def initialize(cache, paths, interval, concurrency = 3, curl_opts = nil,
+ debug = true, **tags)
@cache = cache
+ @interval = interval
@paths = paths
@tags = tags
@curl_opts = curl_opts || UriWrapper::DEFAULT_CURL_OPTS
@concurrency = concurrency
+ @debug = debug
end
# Fetch all the paths, once per tag.
# When a path has been fetched for every tag, block will be called with the
# path, and a hash of tag => UriWrapper::ReadResult objects.
@@ -39,11 +42,16 @@
elsif !base
# We only have the cache, but this item isn't cached!
results[tag] = UriWrapper::ReadResult.error('Not cached')
process_results(path, results)
else
- uri = UriWrapper.new(base + path, @curl_opts)
+ uri = UriWrapper.new(base + path, @curl_opts, @debug)
uri.queue(@hydra) do |resl|
+ # Insert delay to limit fetching rate
+ if @interval != 0
+ SiteDiff.log("Waiting #{@interval} milliseconds.", :info)
+ sleep(@interval / 1000.0)
+ end
@cache.set(tag, path, resl)
results[tag] = resl
process_results(path, results)
end
end