bin/paco_translator.rb in pets-0.1.4 vs bin/paco_translator.rb in pets-0.2.3
- old
+ new
@@ -5,37 +5,56 @@
HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
require 'generalMethods.rb'
require 'optparse'
+require 'semtools'
###############
#METHODS
###############
-def translate_codes_to_terms(patient_data, hpo_storage)
- patients_with_hpo_names = {}
- hpo_names = []
- patient_data.each do |patientID, hpos_and_cnvs|
- hpos = hpos_and_cnvs.shift.split('|')
- hpos.each do |hpo|
- hpo_names << hpo_storage[hpo][1]
- end
- hpos_and_cnvs << hpo_names.join('|')
- patients_with_hpo_names[patientID] = hpos_and_cnvs
- hpo_names = []
- end
- return patients_with_hpo_names
+def translate_hpo(patient_data, hpo, translate)
+ reject_pats = []
+ patient_data.each do |patientID, patient_record|
+ hpos, chr, start, stop = patient_record
+ if translate == 'names'
+ # hpos, rejected = hpo.translate_codes2names(hpos)
+ hpos, rejected = hpo.translate_ids(hpos)
+ elsif translate =='codes'
+ # hpos, rejected = hpo.translate_names2codes(hpos)
+ hpos, rejected = hpo.translate_names(hpos)
+ STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty?
+ end
+ if hpos.empty?
+ reject_pats << patientID
+ else
+ patient_record[0] = hpos
+ end
+ end
+ reject_pats.each do | rj_pat|
+ patient_data.delete(rj_pat)
+ end
end
-def save_translated_file(patients_with_hpo_names, output_file)
- handler = File.open(output_file, 'w')
- patients_with_hpo_names.each do |id, data|
- patientID = id.gsub(/_i[0-9]/,'')
- handler.puts "#{patientID}\t#{data.join("\t")}"
- end
- handler.close
+def save_translated_file(patients_with_hpo_names, output_file, mode)
+ File.open(output_file, 'w') do |f|
+ if mode == 'paco'
+ f.puts "patient_id\tchr\tstart\tstop\tphenotypes"
+ end
+ patients_with_hpo_names.each do |id, patient_record|
+ hpos, chr, start, stop = patient_record
+ id = id.gsub(/_i[0-9]+$/,'')
+ if mode == 'default'
+ f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}"
+ elsif mode == 'paco'
+ f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}"
+ else
+ abort('Wrong save_mode] option, please try default or paco')
+ end
+ end
+ end
end
###############
#OPTIONS
###############
@@ -58,11 +77,11 @@
opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
options[:end_col] = data
end
options[:header] = true
- opts.on("-H", "--header", "Set if the file has a line header. Default true") do
+ opts.on("-H", "--header", "File has a line header. Default true") do
options[:header] = false
end
options[:output_file] = 'paco_file_with_hpo_names.txt'
opts.on("-o", "--output_file PATH", "Output paco file with HPO names") do |data|
@@ -74,29 +93,43 @@
options[:input_file] = value
end
options[:hpo_col] = nil
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
- options[:hpo_col] = data
+ options[:hpo_col] = data
end
options[:start_col] = nil
opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
- options[:start_col] = data
+ options[:start_col] = data
end
+ options[:hpo_separator] = '|'
+ opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
+ options[:hpo_separator] = data
+ end
+ options[:save_mode] = 'default'
+ opts.on("-m", "--save_mode STRING", "Set output data mode") do |data|
+ options[:save_mode] = data
+ end
+
+ options[:translate] = nil
+ opts.on("-t", "--translate STRING", "Set 'names' to translate from hpo codes to names or set 'codes' to translate from hpo names to codes. By default, ther is not translation") do |data|
+ options[:translate] = data
+ end
end.parse!
-
###############
#MAIN
###############
+hpo_file = ENV['hpo_file']
+hpo_file = HPO_FILE if hpo_file.nil?
-hpo_storage = load_hpo_file(HPO_FILE)
-patient_data, $patient_number = load_patient_cohort(options)
-patients_with_hpo_names = translate_codes_to_terms(patient_data, hpo_storage)
-
-save_translated_file(patients_with_hpo_names, options[:output_file])
-
-
-Process.exit
+patient_data = load_patient_cohort(options)
+if !options[:translate].nil?
+ # hpo = Ontology.new
+ # hpo.load_data(hpo_file)
+ hpo = Ontology.new(file: hpo_file, load_file: true)
+ translate_hpo(patient_data, hpo, options[:translate])
+end
+save_translated_file(patient_data, options[:output_file], options[:save_mode])