# RSpec for BioRuby-GFF3-Plugin. Run with something like: # # ruby -I ../bioruby/lib/ ~/.gems/bin/spec spec/gff3_assemble2_spec.rb # # Copyright (C) 2010 Pjotr Prins # $: << "../lib" require 'bio-gff3' include Bio::GFFbrowser FASTAFILE2="test/data/gff/MhA1_Contig125.fa" GFF3FILE2="test/data/gff/MhA1_Contig125.gff3" PROTEINS = { "cds:MhA1_Contig125.frz3.gene2" => "MNDLVNQFKSAALAVGQYLTPVLRESKFKETGVLTPEEFVAAGDHLVHLCPTWSWAKASDSNGQTTFLITKQSALVTQRCAQIMGYDEILKEKIIKDESAETGDEQNEWVDTHHFDFETNCAPKDFEEEENKVEDIKENNLNEEENCEEEEEGEPIDLDEYLSSGLLEEEDPARFVLQNKSLKETKDDSTSNNLLRTRRYDLHITYDKYYQVPRFWLVGYDENGSPLAVDKMKEDFSQEHADKTITLESHPHISGLTLATIHPCRHAPVMKRLIEQFQESGKELLVIDYLFVFLKFVQAVIPTVEYDYTRSIHF*", "cds:MhA1_Contig125.frz3.gene3" => "MERRKVSNTDPFEAAEGMLRWNSDIIKDKEIKQFKGLKKPLKLSENQNDEYDVDPFEAVTDWLPLNKNVDKT*", "cds:MhA1_Contig125.frz3.gene4" => "MKSTKMSATEIVSYHLYSLHTLSSFCLTENPENIFIKDQNFQDFFLFCERVREQFNEAEELKTPLNTKISQTDSTNIQNKKDEPSISIGPCVNDLCPKGFECIENICFKSMEMPKTERVLSIGPCVNAKCPEGFSCYEDDRQCYAN*", "cds:MhA1_Contig125.frz3.gene5" => "MRLDIFLVIAFSLGVAVNCGVVKRSQNSYGDEAGAGGAAGAAPAAAAAPAGGEAAAPSGGETGGAAAPAEAAPAAAPAPEAAPAAAPAPEQAAPAVAQPAPAAAPSQASGYRKKRSQNSYGDEAGHAAGAAPAEAAAPAAGGEAAAPSGGETGGAAAPAETAPAAAPEAAPAAAPAPEQAAPAVAQPAPAAAPSQASGYRKKRSQNSYGDEAGAGGAAGAAPAAAAAPAGGEAAAPAAGGCTEGCAAGGESAAPAAAPAPEAAPAAAPAPEQAAPAVAQPAPAAAPSQASGYRKKRSQNSYGDEAGHAAGAAPAESAAPAAGGEAAAPSGGETGGAAAPAETAPAAAPEAAPAAAPEQAAPAVAQPAPAAAPSQASGY*", "cds:MhA1_Contig125.frz3.gene6" => "MDNAENEEKNDKENHEKPIKFEINNQKQFFKKEDEVKECEEESKNVNEFEEDQGTNEVSEVLRLFRRGNVWAFALQNLDLMRAYVILSCLAIAVVMLANFLRNSRFFDFCLK*", "cds:MhA1_Contig125.frz3.gene7" => "MLFCLLHPMDHNTGPLARKSSTLCSLLLLSIAALLVLAVPGQANSEEVGFGNHTKEKDGDEVTVNIDSVQAPDDLTYAVYEKRFKDVCEFVITKDDIELLYKGKGCTVELLTGENQDITFKTGVKDIGCVRNDCDKASLYSSVGEVEPGLSQSVTDGKTEFELRISGSEFNMNFEEDAPFNPQKNRCAPKQDHIVKPETWRIKNGELKDKHLLVFHLLPKTATREYTKEGKISKEQPPEEAPKCKLFIRFKRPYYEFLYVGPITTTVTTTTTTTTTPSSGLVGQGPTPKTGTHQGNTPKVQGKGSEKESDNTMMIVIIVIVVVVVVLVIGVVLIFILKNKGSKEDELQKVKQTTTKANKSSAVTL*", "cds:MhA1_Contig125.frz3.gene8" => "MRQRESVILNKTENQTQIFEKLLNLYNSPKDVVNLRNNPEQLIQLGIDSKQFSAILEMMFGARRRNSLRGDYREARRFRNRREYSAWWDAGEVNNWRINSRHPSRHGTVEYWRCAFAVGRFFTCPSRIRITFGFGDRYVIVANARNHPHNHNRQNNAGDNNPNTVRRALPMEANERLTARTVHIGPRPSTSAPNQPTTKGQAAPPRASVSTTSANAAATPTTSASSTVQKGTAAPSTSAAPSTSAAPSTSAASRPLKPPGFATAATSATNSQQAAAKPASNQQPAPTATTSQTSASAPGTSSKPPPTTSPAPAATPAPATSQPGTSTVKSAPASTPTPLKPPAATEKQTSQPPSAAPGTTATIKPVLVTNIPGLPPGIPTSATGSGTINVSLTALDAFLAGHPRPASTSAPASSQTTPGPASQPSSAPVTQNKGKEEKKEDKKEEKKEGKKEEKKEGKKEEKRGPG*", "cds:MhA1_Contig125.frz3.gene9" => "MHGATIGNRLRATRRSRDAQMMAAAESVARLSRRHSHQKAIRRVLPPPPALNSSRDSQPINPFCSDPSSIQPVIAKGVCVRSVGVFKSALPPSTPFPSTSTAPNIPSDNTFVPHLNNSTPLHNNHHRTLGGSENCLNYQQQYIGGSYSARSQQQHPPPPAPSSCCISPFKPLEILGNSNGTTDSSSGGCNSARAAMHRQFTGSSNGEEEFTVEELQEFAQAFKMFDKDGNGTMSIKELGVAMRTLGLNPTEDELLNMVNEYDVDGNGIDFSEFCKMMKEMNKETDQELIRLAFRVFDKDGNGYITAQEFRHFMTIDYEEFVNAVAPIVNDGAKEDAPFFEKEQPTSFGQPITSGPPLASGKAKHF*", "cds:MhA1_Contig125.frz3.gene21" => "MDVKPPPSAPQDIKEAIKESNMSTWRPFLIGNRMRTTSEDSAESFDAYDKSFDAYDVGNKKERRLSITEQFFGSSMPGRLRSNSTTEYEGHEHEPTFKKVDLKQFMKHQRKILGDDEWQ*", "cds:MhA1_Contig125.frz3.gene22" => "MAKALISGFVSSGFISKSNISICTRSEATAKSWRLQGFTSAYSKDVFYSEVKKPRAIILIAVKPQIFPSFINEVKANEWFYFGVPGILCISIMSGISLQHFDKEMKSVGFDGHSMRLMPNVNCAVSTGTLVLSADPETPQELVTLVSVLSSYVGKCIRVDEAHFNAASSISGCGPAFIALVIEALADGGVVAGLSRELANQLAADTVKGTGHLFMTKMASVSPTSDNPSPAQLKDQVCSPAGTTIEGVRELEKHGVRSAFIEAIQASTRRAFELSQ*" } describe GFFdb, "Assemble CDS (Contig125)" do before :all do # gffdb = Bio::GFFbrowser::GFFdb.new(GFF3FILE2, :fasta_filename => FASTAFILE2,:cache_components => :cache_none, :cache_records => :cache_none) gffdb = Bio::GFFbrowser::GFFdb.new(GFF3FILE2, :fasta_filename => FASTAFILE2) @gff = gffdb.assembler @gff.parse @contigsequence = @gff.sequencelist["MhA1_Contig125"] @componentlist = {} @cdslist = {} @gff.each_CDS do | id, reclist, component | @componentlist[id] = component @cdslist[id] = reclist end end it "should have the single contig" do @gff.sequencelist.size.should == 1 @gff.sequencelist["MhA1_Contig125"].should_not == nil @gff.sequencelist["MhA1_Contig125"].size.should == 53702 end PROTEINS.each do | name, seq | it "should translate gene #{name}" do recs = @cdslist[name] component = @componentlist[name] cds0 = recs[0] cds0.seqname.should == 'MhA1_Contig125' # ntseq = @gff.assemble(@contigsequence,component.start,recs,:codonize=>true) # p [name, ntseq] aaseq = @gff.assembleAA(@contigsequence,component.start,recs) aaseq.should == seq end end end