bin/reg2phen.rb in pets-0.1.4 vs bin/reg2phen.rb in pets-0.2.3
- old
+ new
@@ -1,116 +1,179 @@
#! /usr/bin/env ruby
#DECIPHER predictor system, using data from cross validation
#data2predict = file to predict
#training_file.txt = file with training data (association values and hpo codes).
-require 'optparse'
-require File.join(File.dirname(__FILE__), '..', 'lib', 'pets', 'generalMethods.rb')
-##########################
-#METHODS
-##########################
+REPORT_FOLDER=File.expand_path(File.join(File.dirname(__FILE__), '..', 'templates'))
+ROOT_PATH = File.dirname(__FILE__)
+$: << File.expand_path(File.join(ROOT_PATH, '..', 'lib', 'pets'))
+require 'optparse'
+require 'generalMethods.rb'
+require 'semtools'
+require 'reg2phen_methods'
-# def predict_patient(prediction_file, training_set, threshold)
-# predicted_hpos = []
-# File.open(prediction_file).each do |line|
-# line.chomp!
-# chr, pt_start, pt_stop = line.split("\t")
-# pt_start = pt_start.to_i
-# pt_stop = pt_stop.to_i
-# query = training_set[chr]
-# association_value = query.last.last
-# next if association_value.to_f < threshold
-# if !query.nil?
-# query.each do |hpo_start, hpo_stop, nodeID, hpo_code, association_value|
-# if (hpo_stop > pt_start && hpo_stop <= pt_stop) ||
-# (hpo_start >= pt_start && hpo_start < pt_stop) ||
-# (hpo_start <= pt_start && hpo_stop >= pt_stop) ||
-# (hpo_start > pt_start && hpo_stop < pt_stop)
-# if association_value >= threshold
-# predicted_hpos << [chr, pt_start, pt_stop].concat([hpo_code, association_value])
-# end
-# end
-# end
-# end
-# end
-# # STDERR.puts predicted_hpos.inspect
-# return predicted_hpos
-# end
-
-def predict_patient(prediction_file, training_set, threshold)
- File.open(prediction_file).each do |line|
- line.chomp!
- # fields = line.split("\t")
- chr, pt_start, pt_stop = line.split("\t")
- pt_start = pt_start.to_i
- pt_stop = pt_stop.to_i
- #associationValue = fields[1]
- # next if associationValue < threshold
- query = training_set[chr]
- # STDERR.puts query.inspect
- if !query.nil?
- query.each do |hpo_start, hpo_stop, nodeID, hpo_code, association_score|
- # Process.exit
- if (hpo_stop > pt_start && hpo_stop <= pt_stop) ||
- (hpo_start >= pt_start && hpo_start < pt_stop) ||
- (hpo_start <= pt_start && hpo_stop >= pt_stop) ||
- (hpo_start > pt_start && hpo_stop < pt_stop)
- puts [chr, pt_start, pt_stop].concat([hpo_code, association_score, hpo_start, hpo_stop]).join("\t") if association_score.to_f >= threshold
- end
- end
- end
- end
-end
-
-
-
##########################
#OPT-PARSER
##########################
options = {}
OptionParser.new do |opts|
opts.banner = "Usage: #{__FILE__} [options]"
- options[:training_file] = nil
- #chr\tstart\tstop\tphenotype\tassociation_value
- opts.on("-t", "--training_file PATH", "Input training file, with association values") do |training_path|
- options[:training_file] = training_path
+ options[:hpo_file] = nil
+ opts.on("-b", "--hpo_file PATH", "Input HPO obo file") do |hpo_file|
+ options[:hpo_file] = hpo_file
end
- options[:prediction_file] = nil
- #chr\tstart\tstop
- opts.on("-p", "--prediction_file PATH", "Input file with regions to predict") do |input_path|
- options[:prediction_file] = input_path
+ options[:patbyclust_file] = nil
+ opts.on("-c", "--patbyclust_file PATH", "Input patients by cluster file") do |data|
+ options[:patbyclust_file] = data
end
+ options[:html_file] = 'reg2phen_results.html'
+ opts.on("-F", "--html_file PATH", "Output HTML file") do |data|
+ options[:html_file] = data
+ end
+
options[:association_limit] = 0
opts.on("-l", "--association_limit FLOAT", "Threshold for association values") do |association_limit|
- options[:association_limit] = association_limit.to_f
+ options[:association_limit] = association_limit.to_f
end
+ options[:input_genes] = false
+ opts.on("-g", "--input_genes", "Input list of genes instead of chromosomic coordinates") do
+ options[:input_genes] = true
+ end
+
+ options[:html_reporting] = false
+ opts.on("-H", "--html_reporting", "Set for HTML report") do
+ options[:html_reporting] = true
+ end
+
options[:output_path] = "output.txt"
opts.on("-o", '--output_path PATH', 'Output path for overlapping patient file') do |output_path|
- options[:output_path] = output_path
+ options[:output_path] = output_path
end
+ options[:output_file] = 'reg2phen_predictions.txt'
+ opts.on("-O", "--output_file PATH", "Output file with patient data") do |data|
+ options[:output_file] = data
+ end
+
+ options[:prediction_file] = nil
+ #chr\tstart\tstop
+ opts.on("-p", "--prediction_file PATH", "Input file with regions to predict. Coordinates must be splitted by tabs") do |input_path|
+ options[:prediction_file] = input_path
+ end
+
+ options[:transform_pvalues] = false
+ opts.on("-P", "--transform_pvalues", "Set to transform association values into P-values (only for HyI associations)") do
+ options[:transform_pvalues] = true
+ end
+
+ options[:top_results] = 10
+ opts.on("-r", "--top_results FLOAT", "Threshold for association values") do |top_results|
+ options[:top_results] = top_results.to_i
+ end
+
+ options[:training_file] = nil
+ #chr\tstart\tstop\tphenotype\tassociation_value
+ opts.on("-t", "--training_file PATH", "Input training file, with association values") do |training_path|
+ options[:training_file] = training_path
+ end
+
opts.on_tail("-h", "--help", "Show this message") do
puts opts
exit
end
end.parse!
+##########################
+#PATHS
+##########################
+all_paths = {code: File.join(File.dirname(__FILE__), '..')}
+all_paths[:external_data] = File.join(all_paths[:code], 'external_data')
+all_paths[:gene_data] = File.join(all_paths[:external_data], 'gene_data.gz')
+all_paths[:biosystems_gene] = File.join(all_paths[:external_data], 'biosystems_gene.gz')
+all_paths[:biosystems_info] = File.join(all_paths[:external_data], 'bsid2info.gz')
+all_paths[:gene_data_with_pathways] = File.join(all_paths[:external_data], 'gene_data_with_pathways.gz')
+all_paths[:gene_location] = File.join(all_paths[:external_data], 'gene_location.gz')
+
##########################
#MAIN
##########################
+output_folder = File.dirname(options[:output_file])
+# #detailed_profile_evaluation_file = File.join(output_folder, 'detailed_hpo_profile_evaluation.csv')
+gene_location, genes_with_kegg = get_and_parse_external_data(all_paths)
+# hpo = Ontology.new
+# hpo.load_data(options[:hpo_file])
+hpo = Ontology.new(file: options[:hpo_file], load_file: true)
+#hpo = nil
+
training_set = load_training_file4regions(options[:training_file])
-predict_patient(options[:prediction_file], training_set, options[:association_limit])
-# predicted_hpos = predict_patient(options[:prediction_file], training_set, options[:association_limit])
-# handler = File.open(options[:output_path], 'w')
-# predicted_hpos.each do |predictions|
-# handler.puts predictions.join("\t")
-# end
-# handler.close
+
+genes_dictionary = {}
+genes_dictionary = generate_genes_dictionary(gene_location, genes_with_kegg) if options[:input_genes]
+
+multiple_regions = []
+File.open(options[:prediction_file]).each do |line|
+ line.chomp!
+ fields = line.split("\t")
+ if options[:input_genes]
+ fields[2] = fields[2].to_i
+ fields[3] = fields[3].to_i
+ else
+ fields[1] = fields[1].to_i
+ fields[2] = fields[2].to_i
+ end
+ multiple_regions << fields
+end
+
+results = predict_patient(
+ multiple_regions,
+ training_set,
+ options[:association_limit],
+ options[:transform_pvalues],
+ options[:input_genes],
+ genes_dictionary
+)
+
+#translate_hpos_in_results(results, hpo)
+
+results.each do |pred, values|
+ values.sort! do |a, b|
+ if !options[:transform_pvalues]
+ b[4] <=> a[4]
+ else
+ a[4] <=> b[4]
+ end
+ end
+end
+
+File.open(options[:output_path], 'w') do |f|
+ results.each do |k, v|
+ f.puts "Results for #{k}:"
+ v.each_with_index do |i, c|
+ f.puts i.join("\t") if c < options[:top_results]
+ end
+ end
+end
+
+######################
+# .ERB TEMPLATE (HTML) DEVELOPING
+######################
+patient_results_table = parse_patient_results(results)
+table_uniq_hpos = patient_results_table.map{|a| a[5]}.uniq
+header = table_uniq_hpos.shift
+table_uniq_hpos = hpo.clean_profile(table_uniq_hpos.map{|a| a.to_sym})
+table_uniq_hpos.unshift(header)
+table_uniq_hpos.map!{|a| [a]}
+container = {
+ :table1 => patient_results_table,
+ :hpo => hpo,
+ :table_uniq_hpos => table_uniq_hpos
+}
+report_data(container, options[:html_file]) if options[:html_reporting]
\ No newline at end of file