Sha256: 177860008c938bb7111d3b1eb3046311a5c7eea6060335dad7198e5ab98d734c
Contents?: true
Size: 1.76 KB
Versions: 6
Compression:
Stored size: 1.76 KB
Contents
require File.dirname(__FILE__) + '/shared' module DataCatalog module ImporterFramework class Puller include Shared REQUIRED = %w(cache_folder pullers) def initialize(options) REQUIRED.each do |r| raise Error, "option :#{r} is required" unless options[r.intern] end @options = options @counter = { :source => 1, :organization => 1, } end def run Utility.report_timing "pull source" do pull_resource(:source) end Utility.report_timing "pull organization" do pull_resource(:organization) end end protected # Note on HTTP Throttling # # It might make sense to throttle HTTP calls in # * pull_organizations # * pull_sources # # However, doing a simple sleep(TIME_DELAY) is too blunt. # It makes sense when an HTTP call is made; however, it does # not make sense when the importer uses a local cache. # # An alternative is to wrap HTTP calls in this Importer library. # It could add a little bit of delay to HTTP calls that are made # too rapidly. # def pull_resource(resource) unless importer_class = @options[:pullers][resource] raise Error, "options[:pullers][:#{resource}] is required" end importer = importer_class.new FileUtils.mkdir_p(folder(resource)) while (data = importer.fetch) do write_data(resource, data) end end def write_data(resource, data) file = folder(resource) + ("/%08i.yml" % @counter[resource]) Utility.write_yaml(file, data) @counter[resource] += 1 end end end end
Version data entries
6 entries across 6 versions & 1 rubygems