lib/spec_id/protein_summary.rb in mspire-0.2.1 vs lib/spec_id/protein_summary.rb in mspire-0.2.2
- old
+ new
@@ -143,13 +143,17 @@
else
name
end
end
- def prefix_to_regex(prefix)
- if prefix
- /^#{Regexp.escape(prefix)}/
+ def flag_to_regex(flag, prefix=false)
+ if flag
+ if prefix
+ /^#{Regexp.escape(flag)}/
+ else
+ /#{Regexp.escape(flag)}/
+ end
else
nil
end
end
@@ -162,16 +166,16 @@
end
end
end
# filters on the false positive regex and sorts by prot probability
- def filter_and_sort(uniq_prots, prefix=nil)
- prefix_re = prefix_to_regex(prefix)
+ def filter_and_sort(uniq_prots, flag=nil, prefix=false)
+ false_flag_re = flag_to_regex(flag, prefix)
sorted = uniq_prots.sort_by {|prt| [prt._probability, prt.parent._probability]}.reverse
## filter on prefix
if prefix
- sorted = sorted.reject {|prot| prot._protein_name =~ prefix_re }
+ sorted = sorted.reject {|prot| prot._protein_name =~ false_flag_re }
end
sorted
end
# assumes that these are sorted on probability
@@ -290,11 +294,11 @@
group.protein.each do |prt|
prots << prt
end
end
uniq_prots = prots.hash_by(:_protein_name).map{|name,prot_arr| prot_arr.first }
- filtered_sorted_prots = filter_and_sort(uniq_prots, opt.f)
+ filtered_sorted_prots = filter_and_sort(uniq_prots, opt.f, opt.prefix)
## num proteins above cutoff (if opt.c)
num_prots_html = ''
if opt.c || opt.cut_at
(num_prots, actual_fppr) = num_prots_above_fppr(filtered_sorted_prots, actual_percent_fp)
@@ -320,21 +324,20 @@
end
# takes spec_id object
# the outfn is the output filename
# opt is an OpenStruct that holds opt.f = the false prefix
- def bioworks_output(spec_id, outfn, file=nil, false_prefix=nil, fppr_output_as_html=nil)
+ def bioworks_output(spec_id, outfn, file=nil, false_flag_re=nil, fppr_output_as_html=nil)
fppr_output_as_html ||= ''
header_anchors = [at('#', 'number'), at('prob','protein probability (for Bioworks, lower is better)'), at('ref', 'gi number if available (or complete reference)'), at('annotation', 'annotation from the fasta file'), at('%cov', 'percent of protein sequence covered by corresponding peptides'), at('peps', 'unique peptides identified (at any confidence) Click number to show/hide.'), at('#peps', 'total number of peptides seen (not unique)')]
num_cols = header_anchors.size
theaders = ths(header_anchors)
proteins = spec_id.prots
protein_num = 0
rows = ""
- prefix_re = prefix_to_regex(false_prefix)
proteins.each do |prot|
- if false_prefix && prot.reference =~ prefix_re
+ if false_flag_re && prot.reference =~ false_flag_re
next
end
uniq_peps = Hash.new {|h,k| h[k] = true; }
protein_num += 1
prot.peps.each do |pep|
@@ -391,11 +394,12 @@
opts = OptionParser.new do |op|
op.banner = "usage: #{File.basename(__FILE__)} [options] <file>.xml ..."
op.separator " where file = bioworks -or- <run>-prot (prophet output)"
op.separator " outputs: <file>.summary.html"
op.separator ""
- op.on("-f", "--false <prefix>", "ignore proteins with prefix (def: #{DEF_PREFIX})") {|v| opt.f = v }
+ op.on("-f", "--false <prefix>", "ignore proteins with flag (def: #{DEF_PREFIX})") {|v| opt.f = v }
+ op.on("--prefix", "false flag for prefixes only") {|v| opt.prefix = v }
op.on("-p", "--precision", "include the output from precision.rb") {|v| opt.p = v }
op.separator(" if --precision then -f is used to specify a file or prefix")
op.separator(" that indicates the false positives.")
op.on("--peptide_count <filename>", "outputs text file with # peptides per protein") {|v| opt.peptide_count = v}
op.separator ""
@@ -432,20 +436,23 @@
when "protproph"
#spec_id = SpecID.new(file)
proph_output(file, outfn, opt, fppr_output_as_html)
when "bioworks"
spec_id = SpecID.new(file)
- bioworks_output(spec_id, outfn, file, opt.f, fppr_output_as_html)
+
+ false_regex = flag_to_regex(opt.f, opt.prefix)
+ bioworks_output(spec_id, outfn, file, false_regex, fppr_output_as_html)
else
abort "filetype for #{file} not recognized!"
end
end
end # method create_from_command_line
def create_precision_argv(file, opt)
# include only those options specific
new_argv = [file]
+ if opt.prefix ; new_argv << '--prefix' end
if opt.f ; new_argv << '-f' << opt.f end
if opt.o ; new_argv << '-o' << opt.o end
new_argv
end