#!/usr/bin/ruby -w require 'spec_id' require 'optparse' require 'ostruct' DELIMITER = "\t" $opt = OpenStruct.new $opt.deltacn = 0.2 $opt.charge1 = 1.5 $opt.charge2 = 2.0 $opt.charge3 = 2.5 opts = OptionParser.new do |op| op.banner = "usage: #{File.basename(__FILE__)} [options] prefixlist bioworks.xml ..." op.on("-1", "--charge1 ", "xcorr <= cutoff for charge (#{$opt.charge1})") { |v| $opt.charge1 = v.to_f } op.on("-2", "--charge2 ", "xcorr <= cutoff for charge (#{$opt.charge2})") { |v| $opt.charge2 = v.to_f } op.on("-3", "--charge3 ", "xcorr <= cutoff for charge (#{$opt.charge3})") { |v| $opt.charge3 = v.to_f } op.on("-d", "--deltacn ", "deltacn >= cutoff (#{$opt.deltacn})") { |v| $opt.deltacn = v.to_f } end opts.parse! if ARGV.size < 2 puts opts exit end prefix_list = ARGV.shift prefixes = prefix_list.split "," files = ARGV.to_a ## Fill in the prefix array with the last prefix given last_prefix = prefixes.first if files.size > prefixes.size files.each_with_index do |file,i| if prefixes[i] last_prefix = prefixes[i] else prefixes[i] = last_prefix end end end ############################### #CH1 = 1.0 #CH2 = 2.0 #CH3 = 3.0 #DELTACN = 0.2 ############################### def passes(pep) if pep.deltacn <= $opt.deltacn case pep.charge when 1 pep.xcorr >= $opt.charge1 when 2 pep.xcorr >= $opt.charge2 when 3 pep.xcorr >= $opt.charge3 end else false end end # adds two categories with results from the hash def analyze(pep_groups, category, hash) best = best_xcorr(pep_groups) top10 = top10_xcorr(pep_groups) hash[category+"Best"] = filter(best).size hash[category+"Top10"] = filter(top10).size end # returns a hash containing the number of peptides passing the thresholds def number_passing(peps) np = {} np["PepProts"] = filter(peps).size by_scan_charge = peps.hash_by(:first_scan, :last_scan, :charge).values analyze(by_scan_charge, "ScanCharge", np) by_scan = peps.hash_by(:first_scan, :last_scan).values analyze(by_scan, "Scan", np) by_seq_charge = peps.hash_by(:sequence, :charge).values analyze(by_seq_charge, "SeqCharge", np) np end # key = :symbol, val = [:lt|:gt|:lte|:gte, val] def filter(peps) peps.select {|pep| passes(pep)} end def top10_xcorr(pep_groups) peptides_by_tens = [] pep_groups.each do |group| arr = group.sort {|a,b| b.xcorr <=> a.xcorr }.slice(0,10) peptides_by_tens.push(*arr) end peptides_by_tens end def best_xcorr(pep_groups) min_peptides = pep_groups.collect do |group| group.max {|a,b| a.xcorr <=> b.xcorr } end end headers = %w(PepProts ScanChargeBest ScanChargeTop10 ScanBest ScanTop10 SeqChargeBest SeqChargeTop10) csv_headers = headers.dup csv_headers.unshift "FILENAME" puts csv_headers.join(DELIMITER) files.each_with_index do |file,i| obj = SpecID.new(file) obj.peps.each do |pep| pep.charge = pep.charge.to_i pep.xcorr = pep.xcorr.to_f pep.deltacn = pep.deltacn.to_f end re_prefix = /^#{Regexp.escape(prefixes[i])}/ prc = proc {|it| it.prot.reference =~ re_prefix } #(match, nomatch) = obj.classify(:peps, prc) (fp, tp) = obj.classify(:peps, prc) (fp_pass, tp_pass) = [fp,tp].map {|v| number_passing(v) } # print to file out tp = headers.map do |head| tp_pass[head] end fp = headers.map do |head| fp_pass[head] end diffs = [] tp.each_index do |i| diffs << (tp[i] - fp[i]) end tp.unshift("TP: " + file) fp.unshift("FP: " + file) diffs.unshift("DIFF: " + file) puts tp.join(DELIMITER) puts fp.join(DELIMITER) puts diffs.join(DELIMITER) end