lib/spec_id/protein_summary.rb in mspire-0.2.2 vs lib/spec_id/protein_summary.rb in mspire-0.2.4
- old
+ new
@@ -4,10 +4,11 @@
require 'hash_by'
require 'optparse'
require 'ostruct'
require 'spec_id'
require 'spec_id/precision'
+require 'gi'
#############################################################
# GLOBALS:
PRECISION_PROGRAM_BASE = 'precision'
DEF_PREFIX = "INV_"
@@ -208,17 +209,30 @@
#### #readable_previous_fppr_rate_percent = sprintf("%.2f", previous_fppr_rate_percent)
# returns a string of the table rows
# false_positive_rate (give as a %) is the cutoff mark
# returns the number of proteins at the desired_fppr (if given)
- def table_rows(uniq_prots, prefix, false_positive_rate_percent, num_cols, desired_fppr, actual_percent_fp, peptide_count_filename=nil)
+ def table_rows(uniq_prots, prefix, false_positive_rate_percent, num_cols, desired_fppr, actual_percent_fp, annotations=nil, peptide_count_filename=nil)
prot_cnt = 0
+ an_cnt = 0
+
uniq_prots.map do |prot|
tr do
prot_cnt += 1
gi = accession(prot._protein_name)
- tds([prot_cnt, prot._probability, ref_html(gi, prot._protein_name), prot.annotation.first._protein_description, prot._percent_coverage, peptide_cell(prot_cnt, prot._unique_stripped_peptides.split('+')), prot._total_number_peptides, prot._pct_spectrum_ids])
+
+ if annotations
+ protein_description = annotations[an_cnt]
+ an_cnt += 1
+ else
+ if prot.annotation.size > 0
+ protein_description = prot.annotation.first._protein_description
+ else
+ protein_description = 'NA'
+ end
+ end
+ tds([prot_cnt, prot._probability, ref_html(gi, prot._protein_name), protein_description, prot._percent_coverage, peptide_cell(prot_cnt, prot._unique_stripped_peptides.split('+')), prot._total_number_peptides, prot._pct_spectrum_ids])
end
end.join
end
def print_html_pieces(file, *pieces)
@@ -308,12 +322,19 @@
filtered_sorted_prots = filtered_sorted_prots[0,num_prots]
end
output_peptide_counts_file(filtered_sorted_prots, opt.peptide_count) if opt.peptide_count
+ # get an array of annotations (or nil if no option)
+ annotations =
+ if opt.get_annotation
+ gis = filtered_sorted_prots.map {|prot| accession(prot._protein_name) }
+ GI.gi2annot(gis)
+ end
+
table_string = table do
- tr{theaders} + table_rows(filtered_sorted_prots, opt.f, actual_percent_fp, num_cols, opt.c.to_f, actual_percent_fp, opt.peptide_count)
+ tr{theaders} + table_rows(filtered_sorted_prots, opt.f, actual_percent_fp, num_cols, opt.c.to_f, actual_percent_fp, annotations, opt.peptide_count)
end
er_info = opt.precision ? error_info(file) : ""
html_pieces = [outfn, header, fppr_output_as_html, er_info, file_info(file), protproph_script_info, num_prots_html, table_string, trailer]
print_html_pieces(*html_pieces)
end # proph_output
@@ -408,9 +429,11 @@
op.separator(" type '#{PRECISION_PROGRAM_BASE}.rb' for details")
op.separator ""
op.separator "Specific to ProteinProphet (with no concatenated DB):"
op.on("-c", "--cutoff percent", "false positive predictive rate (FPPR)% for given cutoff") {|v| opt.c = v }
op.on("--cut_at percent", "only reports proteins within FPPR %") {|v| opt.cut_at = v }
+ op.on("--get_annotation", "retrieves annotation by gi code") {|v| opt.get_annotation = v}
+ op.separator " (use if your proteins have gi's but no annotation) "
end
opts.parse!(argv)
if argv.size < 1