bin/paco_translator.rb in pets-0.2.3 vs bin/paco_translator.rb in pets-0.2.4
- old
+ new
@@ -1,63 +1,14 @@
#! /usr/bin/env ruby
ROOT_PATH = File.dirname(__FILE__)
-EXTERNAL_DATA = File.expand_path(File.join(ROOT_PATH, '..', 'external_data'))
-HPO_FILE = File.join(EXTERNAL_DATA, 'hp.obo')
$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
-require 'generalMethods.rb'
require 'optparse'
-require 'semtools'
+require 'pets'
###############
-#METHODS
-###############
-
-def translate_hpo(patient_data, hpo, translate)
- reject_pats = []
- patient_data.each do |patientID, patient_record|
- hpos, chr, start, stop = patient_record
- if translate == 'names'
- # hpos, rejected = hpo.translate_codes2names(hpos)
- hpos, rejected = hpo.translate_ids(hpos)
- elsif translate =='codes'
- # hpos, rejected = hpo.translate_names2codes(hpos)
- hpos, rejected = hpo.translate_names(hpos)
- STDERR.puts(" The ontology names '#{rejected.join(',')}' were not found") if !rejected.empty?
- end
- if hpos.empty?
- reject_pats << patientID
- else
- patient_record[0] = hpos
- end
- end
- reject_pats.each do | rj_pat|
- patient_data.delete(rj_pat)
- end
-end
-
-def save_translated_file(patients_with_hpo_names, output_file, mode)
- File.open(output_file, 'w') do |f|
- if mode == 'paco'
- f.puts "patient_id\tchr\tstart\tstop\tphenotypes"
- end
- patients_with_hpo_names.each do |id, patient_record|
- hpos, chr, start, stop = patient_record
- id = id.gsub(/_i[0-9]+$/,'')
- if mode == 'default'
- f.puts "#{id}\t#{hpos.join('|')}\t#{[chr, start, stop].join("\t")}"
- elsif mode == 'paco'
- f.puts "#{id}\t#{[chr, start, stop].join("\t")}\t#{hpos.join('|')}"
- else
- abort('Wrong save_mode] option, please try default or paco')
- end
- end
- end
-end
-
-###############
#OPTIONS
###############
options = {}
OptionParser.new do |opts|
@@ -66,13 +17,13 @@
options[:chromosome_col] = nil
opts.on("-c", "--chromosome_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the chromosome") do |data|
options[:chromosome_col] = data
end
- options[:pat_id_col] = nil
+ options[:id_col] = nil
opts.on("-d", "--pat_id_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the patient id") do |data|
- options[:pat_id_col] = data
+ options[:id_col] = data
end
options[:end_col] = nil
opts.on("-e", "--end_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the end mutation coordinate") do |data|
options[:end_col] = data
@@ -91,45 +42,51 @@
options[:input_file] = nil
opts.on("-P", "--input_file PATH", "Input file with PACO extension") do |value|
options[:input_file] = value
end
- options[:hpo_col] = nil
+ options[:ont_col] = nil
opts.on("-p", "--hpo_term_col INTEGER/STRING", "Column name if header true or 0-based position of the column with the HPO terms") do |data|
- options[:hpo_col] = data
+ options[:ont_col] = data
end
options[:start_col] = nil
opts.on("-s", "--start_col INTEGER/STRING", "Column name if header is true, otherwise 0-based position of the column with the start mutation coordinate") do |data|
options[:start_col] = data
end
- options[:hpo_separator] = '|'
+ options[:separator] = '|'
opts.on("-S", "--hpo_separator STRING", "Set which character must be used to split the HPO profile. Default '|'") do |data|
- options[:hpo_separator] = data
+ options[:separator] = data
end
- options[:save_mode] = 'default'
+ options[:n_phens] = nil
+ opts.on("--n_phens INTEGER", "Remove records with N or less phenotypes") do |data|
+ options[:n_phens] = data.to_i
+ end
+
+ options[:save_mode] = :default
opts.on("-m", "--save_mode STRING", "Set output data mode") do |data|
- options[:save_mode] = data
+ options[:save_mode] = data.to_sym
end
- options[:translate] = nil
- opts.on("-t", "--translate STRING", "Set 'names' to translate from hpo codes to names or set 'codes' to translate from hpo names to codes. By default, ther is not translation") do |data|
- options[:translate] = data
+ options[:names] = false
+ opts.on("-n", "--hpo_names", "Define if the input HPO are human readable names. Default false") do
+ options[:names] = true
end
+
+ options[:translate] = false
+ opts.on("-t", "--translate", "Set to translate from hpo codes to names. By default, ther is not translation") do
+ options[:translate] = true
+ end
end.parse!
###############
#MAIN
###############
-hpo_file = ENV['hpo_file']
-hpo_file = HPO_FILE if hpo_file.nil?
+hpo_file = !ENV['hpo_file'].nil? ? ENV['hpo_file'] : HPO_FILE
+Cohort.load_ontology(:hpo, hpo_file)
+Cohort.act_ont = :hpo
-patient_data = load_patient_cohort(options)
-if !options[:translate].nil?
- # hpo = Ontology.new
- # hpo.load_data(hpo_file)
- hpo = Ontology.new(file: hpo_file, load_file: true)
- translate_hpo(patient_data, hpo, options[:translate])
-end
-save_translated_file(patient_data, options[:output_file], options[:save_mode])
+patient_data, rejected_hpos, rejected_patients = Cohort_Parser.load(options)
+rejected_patients_by_phen = patient_data.filter_by_term_number(options[:n_phens]) if !options[:n_phens].nil?
+patient_data.save(options[:output_file], options[:save_mode], options[:translate])