require 'test/unit' require 'spec_id' class SpecIDTest < Test::Unit::TestCase def initialize(arg) super(arg) @tfiles = File.dirname(__FILE__) + '/tfiles/' @tfiles_l = File.dirname(__FILE__) + '/tfiles_large/' @bw = @tfiles + "bioworks_small.xml" @old_prot_proph = @tfiles + 'yeast_gly_small-prot.xml' @prot_proph = @tfiles + 'opd1/000_020_3prots-prot.xml' @srf = @tfiles_l + '7MIX_STD_110802_1.srf' end def test_spec_id_creation sp = SpecID.new(@bw) assert_equal(106, sp.prots.size) end def test_classify_by_false_flag file = @tfiles + "bioworks_with_INV_small.xml" sp = SpecID.new(file) assert_equal(19, sp.prots.size) (tp, fp) = sp.classify_by_false_flag(:prots, "INV_", true, true) assert_equal(4, fp.size, "num false pos") assert_equal(15, tp.size, "num true pos") end def test_precision require 'roc' file = @tfiles + "bioworks_with_INV_small.xml" # 4 INV and 15 non-inv for 19 total prots answ = %w( t t t t t t t t t t F t t t t F t F F ) index = 0 answ.collect! do |bool| bo = false if bool == 't'; bo = true end index += 1 write_index = index ## in the bioworks_with_INV_small.xml, protein 8 and 9 have the same ## probability as protein 7 if write_index == 8 || write_index == 9 write_index = 7 end [write_index, bo] end roc = ROC.new tp, fp = ROC.new.prep_list(answ) (exp_tp, exp_fp) = roc.tps_and_ppv(tp, fp) sp = SpecID.new(file) assert_equal(19, sp.prots.size) tp, fp = sp.rank_and_classify(:prots, proc {|prt| prt.probability }, proc {|prt| if prt.reference =~ /^INV_/ ; false; else; true; end }) (tps, ys) = roc.tps_and_ppv(tp, fp) assert_equal(exp_tp, tps) assert_equal(exp_fp, ys) (num_hits, prec) = sp.num_hits_and_ppv_for_prob("INV_", true) # @TODO: assert these guys for consistencies sake: assert_in_delta_arrays([1, 2, 3, 4, 5, 6, 9, 10, 11, 12, 13, 14, 15], tps, 0.0000001) # Consistency check only: assert_in_delta_arrays([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.909090909090909, 0.916666666666667, 0.923076923076923, 0.928571428571429, 0.866666666666667], prec, 0.0000001) end def assert_in_delta_arrays(one, two, delta, message=nil) one.each_with_index do |v,i| assert_in_delta(v, two[i], delta, message) end end def test_file_type assert_equal('bioworks', SpecID.file_type(@bw)) assert_equal('protproph', SpecID.file_type(@prot_proph)) assert_equal('srg', SpecID.file_type('whatever.srg')) ## WOULD BE NICE TO GET THIS WORKING, TOO # assert_equal('protproph', SpecID.file_type(@old_prot_proph)) if File.exist? @tfiles_l assert File.exist?(@srf), "file #{@srf} is there" assert_equal('srf', SpecID.file_type(@srf)) else assert_nil( puts("\n--SKIPPING TEST-- (missing dir: #{@tfiles_l})") ) end end def test_non_standard_aa_removal hash = {"K.PEPTIDE.Z" => "K.PEPTIDE.Z", "K.*M" => "K.M", "aI" => 'I', "YI.&" => "YI.", "EI.!@#\$%^&*(){}[]|\\;:'\"<>,?/EI" => 'EI.EI'} cl = proc {|v| SpecID::Pep.remove_non_amino_acids(v) } hash.each do |k,v| assert_equal(v, cl.call(k)) end end end class MyProt ; include SpecID::Prot ; end class MyPep ; include SpecID::Pep ; attr_accessor :xcorr end class TestOccamsRazor < Test::Unit::TestCase def test_small prots = (0..6).to_a.map do |n| prot = MyProt.new prot.reference = "ref_#{n}" prot end peps = (0..12).to_a.map {|v| MyPep.new } # 0 1 2 3 4 5 6 7 8 9 10 11 12 aaseqs = %w(AAA BBB CCC ABC AAA BBB CCC ABC DDD EEE FFF EEEEE DDD) xcorrs = [1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0, 0.5, 0.6, 0.7, 0.8, 0.5] peps.zip(aaseqs, xcorrs) do |pep,aaseq,xcorr| pep.aaseq = aaseq pep.xcorr = xcorr end prots[0].peps = peps[0,4] prots[1].peps = [peps[2]] ## should be missing test_prots = prots[0,2] answ = SpecID.occams_razor(test_prots) answ.each do |an| assert( an[0].is_a?(SpecID::Prot), "prots are there") end first = answ.first assert_equal( prots[0], first[0]) assert_equal_array_content( prots[0].peps, first[1]) require 'pp' #pp answ prots[0].peps = peps[0,4] prots[1].peps = [peps[2]] ## should be missing prots[2].peps = [] ## should be missing answ = SpecID.occams_razor(test_prots, true) #pp answ #prots[2].peps = [peps[2]] #prots[2].peps.push( peps[3] ) ## should be there since it has 2 #prots[3].peps = [peps[3]] ## should be missing end def assert_equal_array_content(exp1, ans, message='') exp1.each do |item| assert(ans.include?(item), "finding #{item}: #{message}") end end end require 'fasta' class TestProteinGroups < Test::Unit::TestCase def test_small prots = [] aaseq = ('A'..'Z').to_a.join('') header = "prot1" prots << Fasta::Prot.new(header, aaseq) aaseq = ('A'..'Z').to_a.reverse.join('') header = "prot1_reverse" prots << Fasta::Prot.new(header, aaseq) aaseq = ('A'..'Z').to_a.join('') header = "prot1_identical" prots << Fasta::Prot.new(header, aaseq) aaseq = ('A'..'E').to_a.join('') header = "prot1_short" prots << Fasta::Prot.new(header, aaseq) aaseq = ('A'..'E').to_a.reverse.join('') header = "prot1_reverse_short" prots << Fasta::Prot.new(header, aaseq) fasta = Fasta.new(prots) pep_seqs = %w(ABCD DEFG ABCD DEFG EDCB FEDCB XYZ RANDOM AEABA) arr = SpecID::Pep.protein_groups_by_sequence(pep_seqs, fasta) exp = [[prots[0], prots[2], prots[3]], [prots[0], prots[2]], [prots[0], prots[2], prots[3]], [prots[0],prots[2]], [prots[1], prots[4]], [prots[1]], [prots[0], prots[2]], [], []] assert_equal(exp, arr) end end