fetch.rb in sitediff-0.0.6

- old
+ new

@@ -6,16 +6,19 @@
 class SiteDiff
   class Fetch
     # Cache is a cache object, see sitediff/cache
     # Paths is a list of sub-paths
     # Tags is a hash of tag names => base URLs.
-    def initialize(cache, paths, concurrency = 3, curl_opts = nil, **tags)
+    def initialize(cache, paths, interval, concurrency = 3, curl_opts = nil,
+                   debug = true, **tags)
       @cache = cache
+      @interval = interval
       @paths = paths
       @tags = tags
       @curl_opts = curl_opts || UriWrapper::DEFAULT_CURL_OPTS
       @concurrency = concurrency
+      @debug = debug
     end
 
     # Fetch all the paths, once per tag.
     # When a path has been fetched for every tag, block will be called with the
     # path, and a hash of tag => UriWrapper::ReadResult objects.
@@ -39,11 +42,16 @@
         elsif !base
           # We only have the cache, but this item isn't cached!
           results[tag] = UriWrapper::ReadResult.error('Not cached')
           process_results(path, results)
         else
-          uri = UriWrapper.new(base + path, @curl_opts)
+          uri = UriWrapper.new(base + path, @curl_opts, @debug)
           uri.queue(@hydra) do |resl|
+            # Insert delay to limit fetching rate
+            if @interval != 0
+              SiteDiff.log("Waiting #{@interval} milliseconds.", :info)
+              sleep(@interval / 1000.0)
+            end
             @cache.set(tag, path, resl)
             results[tag] = resl
             process_results(path, results)
           end
         end