Class | DeathMasterFileLoader |
In: |
lib/ssn_validator/models/death_master_file_loader.rb
|
Parent: | Object |
Loads all the update files from dmf.ntis.gov. It starts with the last file loaded, and loads each missing file in sequence up to the current file.
# File lib/ssn_validator/models/death_master_file_loader.rb, line 77 def self.load_update_files_from_web max_as_of = DeathMasterFile.maximum(:as_of) run_file_date = max_as_of.beginning_of_month.next_month last_file_date = Date.today.beginning_of_month while run_file_date <= last_file_date url = "https://dmf.ntis.gov/dmldata/monthly/MA#{run_file_date.strftime("%y%m%d")}" puts "Loading file #{url}" yield "Loading file #{url}" if block_given? dmf = DeathMasterFileLoader.new(url, run_file_date.strftime("%Y-%m-%d")){|status| yield status if block_given?} dmf.load_file do |status| yield status if block_given? end run_file_date += 1.month end end
path_or_url is the full path to the file to load on disk, or the url of an update file. as_of is a string in the formatt YYYY-MM-DD for which the file data is accurate.
# File lib/ssn_validator/models/death_master_file_loader.rb, line 11 def initialize(path_or_url, file_as_of) @file_path_or_url = path_or_url @file_as_of = file_as_of valid?{|status| yield status if block_given?} end
# File lib/ssn_validator/models/death_master_file_loader.rb, line 47 def get_file_from_web uri = URI.parse(@file_path_or_url) request = Net::HTTP::Get.new(uri.request_uri) request.basic_auth(SsnValidator::Ntis.user_name, SsnValidator::Ntis.password) http = Net::HTTP.new(uri.host, uri.port) http.use_ssl = (uri.port == 443) http.verify_mode = OpenSSL::SSL::VERIFY_NONE csv_file = Tempfile.new(@file_path_or_url.split('/').last) # create temp file for the raw file. http.request(request) do |res| raise(ArgumentError, "Invalid URL: #{@file_path_or_url}") if res.kind_of?(Net::HTTPNotFound) raise(ArgumentError, "Authorization Required: Invalid username or password. Set the variables SsnValidator::Ntis.user_name and SsnValidator::Ntis.password in your environment.rb file.") if res.kind_of?(Net::HTTPUnauthorized) size, total = 0, res.header['Content-Length'].to_i res.read_body do |chunk| size += chunk.size csv_file.write chunk puts "%d%% done (%d of %d)" % [(size * 100) / total, size, total] yield("%d%% done (%d of %d)" % [(size * 100) / total, size, total]) if block_given? end end return csv_file.path end
# File lib/ssn_validator/models/death_master_file_loader.rb, line 32 def load_file if DeathMasterFile.connection.kind_of?(ActiveRecord::ConnectionAdapters::MysqlAdapter) || DeathMasterFile.connection.kind_of?(ActiveRecord::ConnectionAdapters::JdbcAdapter) puts "Converting file to csv format for Mysql import. This could take several minutes." yield "Converting file to csv format for Mysql import. This could take several minutes." if block_given? csv_file = convert_file_to_csv{|status| yield status if block_given?} bulk_mysql_update(csv_file){|status| yield status if block_given?} else active_record_file_load{|status| yield status if block_given?} end end
# File lib/ssn_validator/models/death_master_file_loader.rb, line 17 def valid? raise(ArgumentError, "path_or_url not specified") unless @file_path_or_url raise(ArgumentError, "as_of not specified") unless @file_as_of max_as_of = DeathMasterFile.maximum(:as_of) raise(ArgumentError, "A more recent file has already been processed. DB as_of date #{max_as_of}") if max_as_of && (max_as_of >= @file_as_of.to_date) if File.exists?(@file_path_or_url) @download_file = File.open(@file_path_or_url) elsif URI.parse(@file_path_or_url).kind_of?(URI::HTTP) @download_file = File.open(get_file_from_web{|status| yield status if block_given?}) else raise(Errno::ENOENT, @file_path_or_url) end end