bin/paco_translator.rb in pets-0.1.4 vs bin/paco_translator.rb in pets-0.2.3

- old
+ new

@@ -5,37 +5,56 @@ HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo') $: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets')) require 'generalMethods.rb' require 'optparse' +require 'semtools' ############### #METHODS ############### -def translate_codes_to_terms(patient_data, hpo_storage) - patients_with_hpo_names = {} - hpo_names = [] - patient_data.each do |patientID, hpos_and_cnvs| - hpos = hpos_and_cnvs.shift.split('|') - hpos.each do |hpo| - hpo_names << hpo_storage[hpo][1] - end - hpos_and_cnvs << hpo_names.join('|') - patients_with_hpo_names[patientID] = hpos_and_cnvs - hpo_names = [] - end - return patients_with_hpo_names +def translate_hpo(patient_data, hpo, translate) + reject_pats = [] + patient_data.each do |patientID, patient_record| + hpos, chr, start, stop = patient_record + if translate == 'names' + # hpos, rejected = hpo.translate_codes2names(hpos) + hpos, rejected = hpo.translate_ids(hpos) + elsif translate =='codes' + # hpos, rejected = hpo.translate_names2codes(hpos) + hpos, rejected = hpo.translate_names(hpos) + STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty? + end + if hpos.empty? + reject_pats << patientID + else + patient_record[0] = hpos + end + end + reject_pats.each do | rj_pat| + patient_data.delete(rj_pat) + end end -def save_translated_file(patients_with_hpo_names, output_file) - handler = File.open(output_file, 'w') - patients_with_hpo_names.each do |id, data| - patientID = id.gsub(/_i[0-9]/,'') - handler.puts "#{patientID}\t#{data.join("\t")}" - end - handler.close +def save_translated_file(patients_with_hpo_names, output_file, mode) + File.open(output_file, 'w') do |f| + if mode == 'paco' + f.puts "patient_id\tchr\tstart\tstop\tphenotypes" + end + patients_with_hpo_names.each do |id, patient_record| + hpos, chr, start, stop = patient_record + id = id.gsub(/_i[0-9]+$/,'') + if mode == 'default' + f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}" + elsif mode == 'paco' + f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}" + else + abort('Wrong save_mode] option, please try default or paco') + end + end + end end ############### #OPTIONS ############### @@ -58,11 +77,11 @@ opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data| options[:end_col] = data end options[:header] = true - opts.on("-H", "--header", "Set if the file has a line header. Default true") do + opts.on("-H", "--header", "File has a line header. Default true") do options[:header] = false end options[:output_file] = 'paco_file_with_hpo_names.txt' opts.on("-o", "--output_file PATH", "Output paco file with HPO names") do |data| @@ -74,29 +93,43 @@ options[:input_file] = value end options[:hpo_col] = nil opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data| - options[:hpo_col] = data + options[:hpo_col] = data end options[:start_col] = nil opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data| - options[:start_col] = data + options[:start_col] = data end + options[:hpo_separator] = '|' + opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data| + options[:hpo_separator] = data + end + options[:save_mode] = 'default' + opts.on("-m", "--save_mode STRING", "Set output data mode") do |data| + options[:save_mode] = data + end + + options[:translate] = nil + opts.on("-t", "--translate STRING", "Set 'names' to translate from hpo codes to names or set 'codes' to translate from hpo names to codes. By default, ther is not translation") do |data| + options[:translate] = data + end end.parse! - ############### #MAIN ############### +hpo_file = ENV['hpo_file'] +hpo_file = HPO_FILE if hpo_file.nil? -hpo_storage = load_hpo_file(HPO_FILE) -patient_data, $patient_number = load_patient_cohort(options) -patients_with_hpo_names = translate_codes_to_terms(patient_data, hpo_storage) - -save_translated_file(patients_with_hpo_names, options[:output_file]) - - -Process.exit +patient_data = load_patient_cohort(options) +if !options[:translate].nil? + # hpo = Ontology.new + # hpo.load_data(hpo_file) + hpo = Ontology.new(file: hpo_file, load_file: true) + translate_hpo(patient_data, hpo, options[:translate]) +end +save_translated_file(patient_data, options[:output_file], options[:save_mode])