lib/spec_id/protein_summary.rb in mspire-0.2.2 vs lib/spec_id/protein_summary.rb in mspire-0.2.4

- old
+ new

@@ -4,10 +4,11 @@ require 'hash_by' require 'optparse' require 'ostruct' require 'spec_id' require 'spec_id/precision' +require 'gi' ############################################################# # GLOBALS: PRECISION_PROGRAM_BASE = 'precision' DEF_PREFIX = "INV_" @@ -208,17 +209,30 @@ #### #readable_previous_fppr_rate_percent = sprintf("%.2f", previous_fppr_rate_percent) # returns a string of the table rows # false_positive_rate (give as a %) is the cutoff mark # returns the number of proteins at the desired_fppr (if given) - def table_rows(uniq_prots, prefix, false_positive_rate_percent, num_cols, desired_fppr, actual_percent_fp, peptide_count_filename=nil) + def table_rows(uniq_prots, prefix, false_positive_rate_percent, num_cols, desired_fppr, actual_percent_fp, annotations=nil, peptide_count_filename=nil) prot_cnt = 0 + an_cnt = 0 + uniq_prots.map do |prot| tr do prot_cnt += 1 gi = accession(prot._protein_name) - tds([prot_cnt, prot._probability, ref_html(gi, prot._protein_name), prot.annotation.first._protein_description, prot._percent_coverage, peptide_cell(prot_cnt, prot._unique_stripped_peptides.split('+')), prot._total_number_peptides, prot._pct_spectrum_ids]) + + if annotations + protein_description = annotations[an_cnt] + an_cnt += 1 + else + if prot.annotation.size > 0 + protein_description = prot.annotation.first._protein_description + else + protein_description = 'NA' + end + end + tds([prot_cnt, prot._probability, ref_html(gi, prot._protein_name), protein_description, prot._percent_coverage, peptide_cell(prot_cnt, prot._unique_stripped_peptides.split('+')), prot._total_number_peptides, prot._pct_spectrum_ids]) end end.join end def print_html_pieces(file, *pieces) @@ -308,12 +322,19 @@ filtered_sorted_prots = filtered_sorted_prots[0,num_prots] end output_peptide_counts_file(filtered_sorted_prots, opt.peptide_count) if opt.peptide_count + # get an array of annotations (or nil if no option) + annotations = + if opt.get_annotation + gis = filtered_sorted_prots.map {|prot| accession(prot._protein_name) } + GI.gi2annot(gis) + end + table_string = table do - tr{theaders} + table_rows(filtered_sorted_prots, opt.f, actual_percent_fp, num_cols, opt.c.to_f, actual_percent_fp, opt.peptide_count) + tr{theaders} + table_rows(filtered_sorted_prots, opt.f, actual_percent_fp, num_cols, opt.c.to_f, actual_percent_fp, annotations, opt.peptide_count) end er_info = opt.precision ? error_info(file) : "" html_pieces = [outfn, header, fppr_output_as_html, er_info, file_info(file), protproph_script_info, num_prots_html, table_string, trailer] print_html_pieces(*html_pieces) end # proph_output @@ -408,9 +429,11 @@ op.separator(" type '#{PRECISION_PROGRAM_BASE}.rb' for details") op.separator "" op.separator "Specific to ProteinProphet (with no concatenated DB):" op.on("-c", "--cutoff percent", "false positive predictive rate (FPPR)% for given cutoff") {|v| opt.c = v } op.on("--cut_at percent", "only reports proteins within FPPR %") {|v| opt.cut_at = v } + op.on("--get_annotation", "retrieves annotation by gi code") {|v| opt.get_annotation = v} + op.separator " (use if your proteins have gi's but no annotation) " end opts.parse!(argv) if argv.size < 1