lib/pets/phen2reg_methods.rb in pets-0.1.4 vs lib/pets/phen2reg_methods.rb in pets-0.2.3

- old
+ new

@@ -225,208 +225,80 @@ merged_regions << ref_reg return merged_regions end # def hpo_quality_control(prediction_data, hpo_metadata_file, information_coefficient_file) -def hpo_quality_control(prediction_data, hpo_metadata, hpo_child_metadata, hpos_ci_values) +def hpo_quality_control(prediction_data, hpos_ci_values, hpo) characterised_hpos = [] ##information_coef_file= hpo_code, ci ##prediction_data = [hpo1, hpo2, hpo3...] - ##hpo_metadata = {hpo_code => [phenotype, relations]}, relations = [hpo_code_relation, name_relation] - # hpo_metadata = load_hpo_metadata(hpo_metadata_file) - # hpo_child_metadata = inverse_hpo_metadata(hpo_metadata) # hpos_ci_values = load_hpo_ci_values(information_coefficient_file) prediction_data.each do |hpo_code| - tmp = [] + # names, rejected = hpo.translate_codes2names([hpo_code]) + names, rejected = hpo.translate_ids([hpo_code]) + tmp = [names.first, hpo_code] # col hpo name, col hpo code ci = hpos_ci_values[hpo_code] - main_hpo_code, hpo_name, relations = hpo_metadata[hpo_code] - tmp << hpo_name # col hpo name - tmp << hpo_code # col hpo code unless ci.nil? # col exists? and ci values - tmp << "yes" - tmp << ci + tmp.concat(["yes", ci]) else - tmp << "no" - tmp << "-" + tmp.concat(["no", "-"]) end - parent = check_parents(relations, prediction_data, hpo_metadata) - parent << "-" if parent.empty? - tmp << parent # col parents - childs = hpo_child_metadata[hpo_code] - if childs.nil? - childs = [] + # parent = prediction_data & hpo.get_parents(hpo_code) + parent = prediction_data & hpo.get_ancestors(hpo_code, true) + if parent.empty? + parent << "-" else - childs = childs + # n, r = hpo.translate_codes2names(parent) + n, r = hpo.translate_ids(parent) + parent = parent.zip(n) # Combine code ids with hpo names end - tmp << childs + tmp << parent # col parents + specific_childs = hpo.get_childs_table([hpo_code], true) + tmp << specific_childs.first.last characterised_hpos << tmp end - # return characterised_hpos, hpo_metadata return characterised_hpos end -def check_parents(relations, prediction_data, hpo_metadata) - parent = [] - relations.each do |par_hpo_code, par_hpo_name| - if prediction_data.include?(par_hpo_code) - parent << [par_hpo_code, par_hpo_name] - end - grand_par_hpo = hpo_metadata[par_hpo_code] - if !grand_par_hpo.nil? - parent.concat(check_parents(grand_par_hpo.last, prediction_data, hpo_metadata)) - end - end - return parent +def calculate_hpo_recovery_and_filter(adjacent_regions_joined, patient_original_phenotypes, predicted_hpo_percentage, min_hpo_recovery_percentage, patient_number) + records_to_delete = [] + counter = 0 + adjacent_regions_joined.each do |chr, start, stop, hpo_list, association_values, score| + hpo_coincidences = patient_original_phenotypes & hpo_list + original_hpo_recovery_percentage = hpo_coincidences.length / patient_original_phenotypes.length.to_f * 100 + records_to_delete << counter if original_hpo_recovery_percentage < min_hpo_recovery_percentage + query = predicted_hpo_percentage[patient_number] + if query.nil? + predicted_hpo_percentage[patient_number] = [original_hpo_recovery_percentage] + else + query << original_hpo_recovery_percentage + end + counter += 1 + end + records_to_delete.reverse_each do |record_number| + adjacent_regions_joined.delete_at(record_number) + end end -def report_data(characterised_hpos, hpo_associated_regions, html_file, hpo_metadata, genes_with_kegg_data, pathway_stats) +def report_data(characterised_hpos, hpo_associated_regions, html_file, hpo, genes_with_kegg_data, pathway_stats) container = {:characterised_hpos => characterised_hpos, :merged_regions => hpo_associated_regions, - :hpo_metadata => hpo_metadata, + :hpo => hpo, :genes_with_kegg_data => genes_with_kegg_data, :pathway_stats => pathway_stats } template = File.open(File.join(REPORT_FOLDER, 'patient_report.erb')).read report = Report_html.new(container, 'Patient HPO profile summary') report.build(template) report.write(html_file) end -############################################################################## -############################################################################## -##### OLD CODE FOR JOIN REGIONS BY BORDERS -############################################################################## -############################################################################## - -# def sorting_regions_by_shared_hpos(region2hpo) -# #if regions share the same hpos, sort regions from lowest to highest -# #this method returns an array for its use in cluster_regions_by_common_hpos method -# arr_region2hpo = [] -# region2hpo.each do |region, hpos| -# arr_region2hpo << [region, hpos.sort] -# end -# arr_region2hpo.sort!{|r1, r2| r1.last <=> r2.last} -# # # arr_region2hpo = [[1.1.A.1, [hpo1, hpo2, hpo3]], [1.2.A.1, [hpo1, hpo2, hpo3]]...] -# return arr_region2hpo -# end - -# def cluster_regions_by_common_hpos(arr_region2hpo) -# #method for grouping hpos within different locations -# regions_by_hpos = {} -# last_hpos = [] -# regions = [] -# all_regions = [] -# arr_region2hpo.each do |region, hpos| -# all_regions << region -# if last_hpos == hpos -# regions << region -# else -# regions_by_hpos[last_hpos] = regions if !last_hpos.empty? -# regions = [region] -# end -# last_hpos = hpos -# end -# regions_by_hpos[last_hpos] = regions -# #puts regions_by_hpos.inspect -# # #regions_by_hpos = {[hpo1, hpo2, hpo3] => [1.1.A.1, 1.2.A.4, 1.3.A.12]...} -# return regions_by_hpos -# end - -# def prepare_regions_for_profile_analysis(region2hpo, regionAttributes, association_scores, weight_style) -# # region2hpo = {region => [hpo1, hpo2...]} -# # regionAttributes = {region => [chr, start, stop, patients_number, region_length, region]} -# hpo_associated_regions = [] -# arr_region2hpo = sorting_regions_by_shared_hpos(region2hpo) -# regions_by_hpos = cluster_regions_by_common_hpos(arr_region2hpo) -# regions_by_hpos.each do |hpos_list, regions| -# regionIDs = [] -# regions_lengths = [] -# patients_numbers = [] -# region_attributes = regions.map { |region| regionAttributes[region] } -# region_attributes.each do |attributes| -# cur_chr, cur_start, cur_stop, cur_patients_number, cur_region_length, cur_regionID = attributes -# add_region(hpo_associated_regions, cur_chr, cur_start, cur_stop, hpos_list, [cur_regionID], association_scores, [cur_region_length], [cur_patients_number], weight_style) -# end -# end -# #puts hpo_associated_regions.inspect -# return hpo_associated_regions -# end - -# def join_regions_by_borders(region2hpo, regionAttributes, association_scores, weight_style) -# # region2hpo = {region => [hpo1, hpo2...]} -# # regionAttributes = {region => [chr, start, stop, patients_number, region_length, region]} -# joined_regions_by_borders = [] -# arr_region2hpo = sorting_regions_by_shared_hpos(region2hpo) -# regions_by_hpos = cluster_regions_by_common_hpos(arr_region2hpo) -# regions_by_hpos.each do |hpos_list, regions| -# regionIDs = [] -# regions_lengths = [] -# patients_numbers = [] -# region_attributes = regions.map { |region| regionAttributes[region] } -# region_attributes.sort! { |r1, r2| [r1[0], r1[1]] <=> [r2[0], r2[1]] } -# tmp_chr = nil -# tmp_start = nil -# tmp_stop = nil -# region_attributes.each_with_index do |attributes, counter| -# break if counter + 1 == region_attributes.length -# cur_chr, cur_start, cur_stop, cur_patients_number, cur_region_length, cur_regionID = attributes -# next_chr, next_start, next_stop, next_patients_number, next_region_length, next_regionID = region_attributes[counter + 1] -# if cur_chr == next_chr -# if cur_stop == next_start || cur_stop == next_start + 1 -# tmp_chr = cur_chr -# tmp_start = cur_start if tmp_start.nil? -# tmp_stop = cur_stop -# else -# add_region(joined_regions_by_borders, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, regions_lengths, patients_numbers, weight_style) -# tmp_chr = nil -# tmp_start = nil -# tmp_stop = nil -# end -# else -# add_region(joined_regions_by_borders, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, regions_lengths, patients_numbers, weight_style) -# tmp_chr = nil -# tmp_start = nil -# tmp_stop = nil -# end -# regionIDs << cur_regionID if regionIDs.empty? -# regionIDs << next_regionID -# regions_lengths << cur_region_length if regions_lengths.empty? -# regions_lengths << next_region_length -# patients_numbers << cur_patients_number if patients_numbers.empty? -# patients_numbers << next_patients_number -# end -# add_region(joined_regions_by_borders, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, regions_lengths, patients_numbers, weight_style) -# end -# #puts joined_regions_by_borders.inspect -# return joined_regions_by_borders -# end - -# def add_region(hpo_associated_regions, tmp_chr, tmp_start, tmp_stop, hpos_list, regionIDs, association_scores, region_lengths, patients_numbers, weight_style) -# #region_lengths = number of regions that have the same HPOs -# unless tmp_chr.nil? && tmp_start.nil? && tmp_stop.nil? -# association_values_by_region = regionIDs.map {|r| association_scores[r]} -# weighted_association_scores = [] -# hpos_list.each do |hpo| -# scores = association_values_by_region.map{|hpo_scores| hpo_scores[hpo] } -# weighted_score = 0 -# weight = 0 -# if scores.length == 1 -# weighted_score = scores.first -# weight = 1 -# else -# scores.each_with_index do |s, i| -# if weight_style == 'double' -# weighted_score += s * region_lengths[i] * patients_numbers[i] -# weight += region_lengths[i] * patients_numbers[i] -# elsif weight_style == 'simple' -# weighted_score += s * region_lengths[i] -# weight += region_lengths[i] -# else -# abort("Invalid weight method: #{weight_style}") -# end -# end -# end -# weighted_association_scores << weighted_score/weight -# end -# hpo_associated_regions << [tmp_chr, tmp_start, tmp_stop, hpos_list, weighted_association_scores] -# end -# end +def save_patient_matrix(output, patient_hpo_profile, regionAttributes, hpo_region_matrix) + File.open(output, "w") do |f| + f.puts "Region\t#{patient_hpo_profile.join("\t")}" + regionAttributes_array = regionAttributes.values + hpo_region_matrix.each_with_index do |association_values, i| + chr, start, stop = regionAttributes_array[i] + f.puts "#{chr}:#{start}-#{stop}\t#{association_values.join("\t")}" + end + end +end \ No newline at end of file