lib/spec_id/protein_summary.rb in mspire-0.2.1 vs lib/spec_id/protein_summary.rb in mspire-0.2.2

- old
+ new

@@ -143,13 +143,17 @@ else name end end - def prefix_to_regex(prefix) - if prefix - /^#{Regexp.escape(prefix)}/ + def flag_to_regex(flag, prefix=false) + if flag + if prefix + /^#{Regexp.escape(flag)}/ + else + /#{Regexp.escape(flag)}/ + end else nil end end @@ -162,16 +166,16 @@ end end end # filters on the false positive regex and sorts by prot probability - def filter_and_sort(uniq_prots, prefix=nil) - prefix_re = prefix_to_regex(prefix) + def filter_and_sort(uniq_prots, flag=nil, prefix=false) + false_flag_re = flag_to_regex(flag, prefix) sorted = uniq_prots.sort_by {|prt| [prt._probability, prt.parent._probability]}.reverse ## filter on prefix if prefix - sorted = sorted.reject {|prot| prot._protein_name =~ prefix_re } + sorted = sorted.reject {|prot| prot._protein_name =~ false_flag_re } end sorted end # assumes that these are sorted on probability @@ -290,11 +294,11 @@ group.protein.each do |prt| prots << prt end end uniq_prots = prots.hash_by(:_protein_name).map{|name,prot_arr| prot_arr.first } - filtered_sorted_prots = filter_and_sort(uniq_prots, opt.f) + filtered_sorted_prots = filter_and_sort(uniq_prots, opt.f, opt.prefix) ## num proteins above cutoff (if opt.c) num_prots_html = '' if opt.c || opt.cut_at (num_prots, actual_fppr) = num_prots_above_fppr(filtered_sorted_prots, actual_percent_fp) @@ -320,21 +324,20 @@ end # takes spec_id object # the outfn is the output filename # opt is an OpenStruct that holds opt.f = the false prefix - def bioworks_output(spec_id, outfn, file=nil, false_prefix=nil, fppr_output_as_html=nil) + def bioworks_output(spec_id, outfn, file=nil, false_flag_re=nil, fppr_output_as_html=nil) fppr_output_as_html ||= '' header_anchors = [at('#', 'number'), at('prob','protein probability (for Bioworks, lower is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (at any confidence) Click number to show/hide.'), at('#peps', 'total number of peptides seen (not unique)')] num_cols = header_anchors.size theaders = ths(header_anchors) proteins = spec_id.prots protein_num = 0 rows = "" - prefix_re = prefix_to_regex(false_prefix) proteins.each do |prot| - if false_prefix && prot.reference =~ prefix_re + if false_flag_re && prot.reference =~ false_flag_re next end uniq_peps = Hash.new {|h,k| h[k] = true; } protein_num += 1 prot.peps.each do |pep| @@ -391,11 +394,12 @@ opts = OptionParser.new do |op| op.banner = "usage: #{File.basename(__FILE__)} [options] <file>.xml ..." op.separator " where file = bioworks -or- <run>-prot (prophet output)" op.separator " outputs: <file>.summary.html" op.separator "" - op.on("-f", "--false <prefix>", "ignore proteins with prefix (def: #{DEF_PREFIX})") {|v| opt.f = v } + op.on("-f", "--false <prefix>", "ignore proteins with flag (def: #{DEF_PREFIX})") {|v| opt.f = v } + op.on("--prefix", "false flag for prefixes only") {|v| opt.prefix = v } op.on("-p", "--precision", "include the output from precision.rb") {|v| opt.p = v } op.separator(" if --precision then -f is used to specify a file or prefix") op.separator(" that indicates the false positives.") op.on("--peptide_count <filename>", "outputs text file with # peptides per protein") {|v| opt.peptide_count = v} op.separator "" @@ -432,20 +436,23 @@ when "protproph" #spec_id = SpecID.new(file) proph_output(file, outfn, opt, fppr_output_as_html) when "bioworks" spec_id = SpecID.new(file) - bioworks_output(spec_id, outfn, file, opt.f, fppr_output_as_html) + + false_regex = flag_to_regex(opt.f, opt.prefix) + bioworks_output(spec_id, outfn, file, false_regex, fppr_output_as_html) else abort "filetype for #{file} not recognized!" end end end # method create_from_command_line def create_precision_argv(file, opt) # include only those options specific new_argv = [file] + if opt.prefix ; new_argv << '--prefix' end if opt.f ; new_argv << '-f' << opt.f end if opt.o ; new_argv << '-o' << opt.o end new_argv end