class DeathMasterFileLoader

Public Class Methods

load_update_files_from_web() { |"Loading file| ... } click to toggle source

Loads all the update files from dmf.ntis.gov. It starts with the last file loaded, and loads each missing file in sequence up to the current file.

# File lib/ssn_validator/models/death_master_file_loader.rb, line 80
def self.load_update_files_from_web
  max_as_of = DeathMasterFile.maximum(:as_of)
  run_file_date = max_as_of.beginning_of_month.next_month
  last_file_date = Date.today.beginning_of_month
  while run_file_date <= last_file_date
    url = "https://dmf.ntis.gov/dmldata/monthly/MA#{run_file_date.strftime("%y%m%d")}"
    puts "Loading file #{url}"
    yield "Loading file #{url}" if block_given?
    dmf = DeathMasterFileLoader.new(url, run_file_date.strftime("%Y-%m-%d")) { |status| yield status if block_given? }
    dmf.load_file do |status|
      yield status if block_given?
    end
    run_file_date += 1.month
  end
end
new(path_or_url, file_as_of) { |status| ... } click to toggle source

path_or_url is the full path to the file to load on disk, or the url of an update file. as_of is a string in the formatt YYYY-MM-DD for which the file data is accurate.

# File lib/ssn_validator/models/death_master_file_loader.rb, line 11
def initialize(path_or_url, file_as_of)
  @file_path_or_url = path_or_url
  @file_as_of = file_as_of
  valid? { |status| yield status if block_given? }
end

Public Instance Methods

get_file_from_web() { |"%d%% done (%d of %d)" % [(size * 100) / total, size, total]| ... } click to toggle source
# File lib/ssn_validator/models/death_master_file_loader.rb, line 47
def get_file_from_web
  uri = URI.parse(@file_path_or_url)

  request = Net::HTTP::Get.new(uri.request_uri)
  request.basic_auth(SsnValidator::Ntis.user_name, SsnValidator::Ntis.password)

  proxy_addr, proxy_port = ENV['http_proxy'].gsub("http://", "").split(/:/) if ENV['http_proxy']
  proxy_user, proxy_pass = uri.userinfo.split(/:/) if uri.userinfo
  http = Net::HTTP::Proxy(proxy_addr, proxy_port, proxy_user, proxy_pass).new(uri.host, uri.port)

  http.use_ssl = (uri.port == 443)
  http.verify_mode = OpenSSL::SSL::VERIFY_NONE

  csv_file = Tempfile.new(@file_path_or_url.split('/').last) # create temp file for the raw file.
  http.request(request) do |res|
    raise(ArgumentError, "Invalid URL: #{@file_path_or_url}") if res.kind_of?(Net::HTTPNotFound)
    raise(ArgumentError, "Authorization Required: Invalid username or password.  Set the variables SsnValidator::Ntis.user_name and SsnValidator::Ntis.password in your environment.rb file.") if res.kind_of?(Net::HTTPUnauthorized)
    size, total = 0, res.header['Content-Length'].to_i
    res.read_body do |chunk|
      size += chunk.size
      csv_file.write chunk
      puts "%d%% done (%d of %d)" % [(size * 100) / total, size, total]
      yield("%d%% done (%d of %d)" % [(size * 100) / total, size, total]) if block_given?
    end

  end

  return csv_file.path
end
load_file() { |"Converting file to csv format for Mysql import. This could take several minutes."| ... } click to toggle source
# File lib/ssn_validator/models/death_master_file_loader.rb, line 32
def load_file

  if DeathMasterFile.connection.kind_of?(ActiveRecord::ConnectionAdapters::MysqlAdapter) || DeathMasterFile.connection.kind_of?(ActiveRecord::ConnectionAdapters::JdbcAdapter)
    puts "Converting file to csv format for Mysql import.  This could take several minutes."
    yield "Converting file to csv format for Mysql import.  This could take several minutes." if block_given?

    csv_file = convert_file_to_csv { |status| yield status if block_given? }

    bulk_mysql_update(csv_file) { |status| yield status if block_given? }
  else
    active_record_file_load { |status| yield status if block_given? }
  end

end
valid?() { |status| ... } click to toggle source
# File lib/ssn_validator/models/death_master_file_loader.rb, line 17
def valid?
  raise(ArgumentError, "path_or_url not specified") unless @file_path_or_url
  raise(ArgumentError, "as_of not specified") unless @file_as_of
  max_as_of = DeathMasterFile.maximum(:as_of)
  raise(ArgumentError, "A more recent file has already been processed.  DB as_of date #{max_as_of}") if  max_as_of && (max_as_of >= @file_as_of.to_date)

  if File.exists?(@file_path_or_url)
    @download_file = File.open(@file_path_or_url)
  elsif URI.parse(@file_path_or_url).kind_of?(URI::HTTP)
    @download_file = File.open(get_file_from_web { |status| yield status if block_given? })
  else
    raise(Errno::ENOENT, @file_path_or_url)
  end
end