lib/spout/tasks/engine.rake in spout-0.1.0 vs lib/spout/tasks/engine.rake in spout-0.2.0

- old
+ new

@@ -17,77 +17,233 @@ require 'json' desc 'Create Data Dictionary from repository' task :create do - folder = "dd/#{ENV['VERSION'] || Spout::Application.new.version}" + folder = "dd/#{ENV['VERSION'] || '1.0.0'}" FileUtils.mkpath folder - CSV.open("#{folder}/variables.csv", "wb") do |csv| - keys = %w(id display_name description type units domain labels calculation) - csv << ['folder'] + keys - Dir.glob("variables/**/*.json").each do |file| - if json = JSON.parse(File.read(file)) rescue false - variable_folder = file.gsub(/variables\//, '').split('/')[0..-2].join('/') - csv << [variable_folder] + keys.collect{|key| json[key].kind_of?(Array) ? json[key].join(';') : json[key].to_s} - end - end + case ENV['TYPE'] + when 'hybrid' + hybrid_export(folder) + else + standard_export(folder) end - CSV.open("#{folder}/domains.csv", "wb") do |csv| - keys = %w(value display_name description) - csv << ['folder', 'id'] + keys - Dir.glob("domains/**/*.json").each do |file| - if json = JSON.parse(File.read(file)) rescue false - domain_folder = file.gsub(/domains\//, '').split('/')[0..-2].join('/') - domain_name = file.gsub(/domains\//, '').split('/').last.to_s.gsub(/.json/, '') - json.each do |hash| - csv << [domain_folder, domain_name] + keys.collect{|key| hash[key]} - end - end - end - end + puts "Data Dictionary Created in #{folder}" end desc 'Initialize JSON repository from a CSV file: CSV=datadictionary.csv' task :import do - additional_csv_info = "\n\nFor additional information on specifying CSV column headers before import see:\n\n " + "https://github.com/sleepepi/spout#generate-a-new-repository-from-an-existing-csv-file".colorize( :light_cyan ) + "\n\n" - puts ENV['CSV'].inspect if File.exists?(ENV['CSV'].to_s) - CSV.parse( File.open(ENV['CSV'].to_s, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line| - row = line.to_hash - if not row.keys.include?('id') - puts "\nMissing column header `".colorize( :red ) + "id".colorize( :light_cyan ) + "` in data dictionary.".colorize( :red ) + additional_csv_info - exit(1) + ENV['TYPE'] == 'domains' ? import_domains : import_variables + else + puts "\nPlease specify a valid CSV file.".colorize( :red ) + additional_csv_info + end + end +end + +def standard_export(folder) + CSV.open("#{folder}/variables.csv", "wb") do |csv| + keys = %w(id display_name description type units domain labels calculation) + csv << ['folder'] + keys + Dir.glob("variables/**/*.json").each do |file| + if json = JSON.parse(File.read(file)) rescue false + variable_folder = variable_folder_path(file) + csv << [variable_folder] + keys.collect{|key| json[key].kind_of?(Array) ? json[key].join(';') : json[key].to_s} + end + end + end + CSV.open("#{folder}/domains.csv", "wb") do |csv| + keys = %w(value display_name description) + csv << ['folder', 'domain_id'] + keys + Dir.glob("domains/**/*.json").each do |file| + if json = JSON.parse(File.read(file)) rescue false + domain_folder = domain_folder_path(file) + domain_name = extract_domain_name(file) + json.each do |hash| + csv << [domain_folder, domain_name] + keys.collect{|key| hash[key]} end - next if row['id'] == '' - folder = File.join('variables', row.delete('folder').to_s) - FileUtils.mkpath folder - hash = {} - id = row.delete('id') - hash['id'] = id - hash['display_name'] = row.delete('display_name') - hash['description'] = row.delete('description').to_s - hash['type'] = row.delete('type') - domain = row.delete('domain').to_s - hash['domain'] = domain if domain != '' - units = row.delete('units').to_s - hash['units'] = units if units != '' - calculation = row.delete('calculation').to_s - hash['calculation'] = calculation if calculation != '' - labels = row.delete('labels').to_s.split(';') - hash['labels'] = labels if labels.size > 0 - hash['other'] = row unless row.empty? + end + end + end +end - file_name = File.join(folder, id.downcase + '.json') - File.open(file_name, 'w') do |file| - file.write(JSON.pretty_generate(hash)) +def extract_domain_name(file) + file.gsub(/domains\//, '').split('/').last.to_s.gsub(/.json/, '') +end + +def domain_folder_path(file) + file.gsub(/domains\//, '').split('/')[0..-2].join('/') +end + +def variable_folder_path(file) + file.gsub(/variables\//, '').split('/')[0..-2].join('/') +end + +def hybrid_concept_type(json) + if json['hybrid'] and json['hybrid']['type'].to_s != '' + json['hybrid']['type'] + else + hybrid_concept_type_map(json['type']) + end +end + +def hybrid_concept_type_map(variable_type) + hybrid_types = { "choices" => "categorical", + "numeric" => "continuous", + "integer" => "continuous", + "string" => "free text", + "text" => "free text", + "date" => "datetime", + "time" => "datetime", + "file" => "free text" } + hybrid_types[variable_type] || variable_type +end + +def hybrid_property(json, property) + json['hybrid'] ? json['hybrid'][property] : '' +end + +def hybrid_export(folder) + domain_parents = {} + CSV.open("#{folder}/hybrid.csv", "wb") do |csv| + csv << ["Folder", "Short Name", "Description", "Concept Type", "Units", "Terms", "Internal Terms", "Parents", "Children", "Field Values", "Sensitivity", "Display Name", "Commonly Used", "Calculation", "Source Name", "Source File"] + Dir.glob("variables/**/*.json").each do |file| + if json = JSON.parse(File.read(file)) rescue false + if json['domain'].to_s != '' + domain_parents[json['domain'].to_s.downcase] ||= [] + domain_parents[json['domain'].to_s.downcase] << json['id'].to_s end - puts " create".colorize( :green ) + " #{file_name}" + row = [ + variable_folder_path(file), # Folder + json['id'], # Short Name + json['description'], # Description + hybrid_concept_type(json), # Concept Type + json['units'], # Units + (json['labels'] || []).join(';'), # Terms + '', # Internal Terms + '', # Parents + '', # Children + '', # Field Values + hybrid_property(json, 'access level'), # Sensitivity + json['display_name'], # Display Name + hybrid_property(json, 'most commonly used'), # Commonly Used + json['calculation'], # Calculation + hybrid_property(json, 'SOURCE'), # Source Name + hybrid_property(json, 'filename') # Source File + ] + csv << row end - else - puts "\nPlease specify a valid CSV file.".colorize( :red ) + additional_csv_info end + Dir.glob("domains/**/*.json").each do |file| + if json = JSON.parse(File.read(file)) rescue false + json.each do |option| + row = [ + domain_folder_path(file), # Folder + extract_domain_name(file)+'_'+option['value'].to_s, # Short Name + option['description'], # Description + 'boolean', # Concept Type + '', # Units + '', # Terms + option['value'], # Internal Terms + (domain_parents[extract_domain_name(file).downcase] || []).join(';'), # Parents + '', # Children + '', # Field Values + '0', # Sensitivity + option['display_name'], # Display Name + '', # Commonly Used + '', # Calculation + ] + csv << row + end + end + end end +end + +def import_variables + CSV.parse( File.open(ENV['CSV'].to_s, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line| + row = line.to_hash + if not row.keys.include?('id') + puts "\nMissing column header `".colorize( :red ) + "id".colorize( :light_cyan ) + "` in data dictionary.".colorize( :red ) + additional_csv_info + exit(1) + end + next if row['id'] == '' + folder = File.join('variables', row.delete('folder').to_s) + FileUtils.mkpath folder + hash = {} + id = row.delete('id') + hash['id'] = id + hash['display_name'] = row.delete('display_name') + hash['description'] = row.delete('description').to_s + hash['type'] = row.delete('type') + domain = row.delete('domain').to_s + hash['domain'] = domain if domain != '' + units = row.delete('units').to_s + hash['units'] = units if units != '' + calculation = row.delete('calculation').to_s + hash['calculation'] = calculation if calculation != '' + labels = row.delete('labels').to_s.split(';') + hash['labels'] = labels if labels.size > 0 + hash['other'] = row unless row.empty? + + file_name = File.join(folder, id.downcase + '.json') + File.open(file_name, 'w') do |file| + file.write(JSON.pretty_generate(hash) + "\n") + end + puts " create".colorize( :green ) + " #{file_name}" + end +end + +def import_domains + domains = {} + + CSV.parse( File.open(ENV['CSV'].to_s, 'r:iso-8859-1:utf-8'){|f| f.read}, headers: true ) do |line| + row = line.to_hash + if not row.keys.include?('domain_id') + puts "\nMissing column header `".colorize( :red ) + "domain_id".colorize( :light_cyan ) + "` in data dictionary.".colorize( :red ) + additional_csv_info + exit(1) + end + if not row.keys.include?('value') + puts "\nMissing column header `".colorize( :red ) + "value".colorize( :light_cyan ) + "` in data dictionary.".colorize( :red ) + additional_csv_info + exit(1) + end + if not row.keys.include?('display_name') + puts "\nMissing column header `".colorize( :red ) + "display_name".colorize( :light_cyan ) + "` in data dictionary.".colorize( :red ) + additional_csv_info + exit(1) + end + + next if row['domain_id'].to_s == '' or row['value'].to_s == '' or row['display_name'].to_s == '' + folder = File.join('domains', row['folder'].to_s).gsub(/[^a-zA-Z0-9_\/\.-]/, '_') + domain_name = row['domain_id'].to_s.gsub(/[^a-zA-Z0-9_\/\.-]/, '_') + domains[domain_name] ||= {} + domains[domain_name]["folder"] = folder + domains[domain_name]["options"] ||= [] + + hash = {} + hash['value'] = row.delete('value').to_s + hash['display_name'] = row.delete('display_name').to_s + hash['description'] = row.delete('description').to_s + + domains[domain_name]["options"] << hash + end + + domains.each do |domain_name, domain_hash| + folder = domain_hash["folder"] + FileUtils.mkpath folder + + file_name = File.join(folder, domain_name.downcase + '.json') + + File.open(file_name, 'w') do |file| + file.write(JSON.pretty_generate(domain_hash["options"]) + "\n") + end + puts " create".colorize( :green ) + " #{file_name}" + end + +end + +def additional_csv_info + "\n\nFor additional information on specifying CSV column headers before import see:\n\n " + "https://github.com/sleepepi/spout#generate-a-new-repository-from-an-existing-csv-file".colorize( :light_cyan ) + "\n\n" end