# # test/unit/bio/db/test_gff.rb - Unit test for Bio::GFF # # Copyright:: Copyright (C) 2005, 2008 # Mitsuteru Nakao # Naohisa Goto # License:: The Ruby License # # $Id:$ # require 'pathname' libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s $:.unshift(libpath) unless $:.include?(libpath) require 'test/unit' require 'digest/sha1' require 'bio/db/gff' module Bio class TestGFF < Test::Unit::TestCase def setup data = <'"Chromosome I Centromere"', "Gene"=>'"CEN1"'} assert_equal(at, @obj.attributes) end def test_comment assert_equal(nil, @obj.comment) end end # class TestGFFRecord class TestGFFRecordConstruct < Test::Unit::TestCase def setup @obj = Bio::GFF.new end def test_add_seqname name = "test" record = Bio::GFF::Record.new("") record.seqname = name @obj.records << record assert_equal(name, @obj.records[0].seqname) end end # class TestGFFRecordConstruct class TestGFF2 < Test::Unit::TestCase def setup data = < Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']), 'E_value' => '0.0003', 'Align' => Bio::GFF::GFF2::Record::Value.new(['101', '11']), 'Comment' => Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]), 'Note' => '', 'Misc' => 'IdString' } assert_equal(hash, @obj.attributes_to_hash) end def test_attributes attributes = [ [ 'Target', Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) ], [ 'E_value', '0.0003' ], [ 'Align', Bio::GFF::GFF2::Record::Value.new(['101', '11']) ], [ 'Align', Bio::GFF::GFF2::Record::Value.new(['179', '36']) ], [ 'Comment', Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) ], [ 'Note', '' ], [ 'Misc', 'IdString' ], [ 'Misc', 'free text' ], [ 'Misc', '5678' ] ] assert_equal(attributes, @obj.attributes) end def test_attribute val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) assert_equal(val_Target, @obj.attribute('Target')) assert_equal('0.0003', @obj.attribute('E_value')) val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11']) val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36']) assert_equal(val_Align0, @obj.attribute('Align')) val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) assert_equal(val_Comment, @obj.attribute('Comment')) assert_equal('', @obj.attribute('Note')) assert_equal('IdString', @obj.attribute('Misc')) end def test_attribute_nonexistent assert_equal(nil, @obj.attribute('NonExistent')) end def test_get_attribute val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) assert_equal(val_Target, @obj.get_attribute('Target')) assert_equal('0.0003', @obj.get_attribute('E_value')) val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11']) val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36']) assert_equal(val_Align0, @obj.get_attribute('Align')) val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) assert_equal(val_Comment, @obj.get_attribute('Comment')) assert_equal('', @obj.get_attribute('Note')) assert_equal('IdString', @obj.get_attribute('Misc')) end def test_get_attribute_nonexistent assert_equal(nil, @obj.get_attribute('NonExistent')) end def test_get_attributes val_Target = Bio::GFF::GFF2::Record::Value.new(['HBA_HUMAN', '11', '55']) assert_equal([ val_Target ], @obj.get_attributes('Target')) assert_equal([ '0.0003' ], @obj.get_attributes('E_value')) val_Align0 = Bio::GFF::GFF2::Record::Value.new(['101', '11']) val_Align1 = Bio::GFF::GFF2::Record::Value.new(['179', '36']) assert_equal([ val_Align0, val_Align1 ], @obj.get_attributes('Align')) val_Comment = Bio::GFF::GFF2::Record::Value.new(["Please ignore this \"Comment\" attribute; Escape \x1a\037 and \\\t\r\n\f\b\a\e\v; This is test.", "123", "4.56e-34", "Test for freetext"]) assert_equal([ val_Comment ], @obj.get_attributes('Comment')) assert_equal([ '' ], @obj.get_attributes('Note')) assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) end def test_get_attributes_nonexistent assert_equal([], @obj.get_attributes('NonExistent')) end def test_set_attribute assert_equal('0.0003', @obj.attribute('E_value')) assert_equal('1e-10', @obj.set_attribute('E_value', '1e-10')) assert_equal('1e-10', @obj.attribute('E_value')) end def test_set_attribute_multiple assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal('Replaced', @obj.set_attribute('Misc', 'Replaced')) assert_equal([ 'Replaced', 'free text', '5678' ], @obj.get_attributes('Misc')) end def test_set_attribute_nonexistent assert_equal(nil, @obj.attribute('NonExistent')) assert_equal('test', @obj.set_attribute('NonExistent', 'test')) assert_equal('test', @obj.attribute('NonExistent')) end def test_replace_attributes assert_equal([ '0.0003' ], @obj.get_attributes('E_value')) assert_equal(@obj, @obj.replace_attributes('E_value', '1e-10')) assert_equal([ '1e-10' ], @obj.get_attributes('E_value')) end def test_replace_attributes_single_multiple assert_equal([ '0.0003' ], @obj.get_attributes('E_value')) assert_equal(@obj, @obj.replace_attributes('E_value', '1e-10', '3.14', '2.718')) assert_equal([ '1e-10', '3.14', '2.718' ], @obj.get_attributes('E_value')) end def test_replace_attributes_multiple_single assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(@obj, @obj.replace_attributes('Misc', 'Replaced_All')) assert_equal([ 'Replaced_All' ], @obj.get_attributes('Misc')) end def test_replace_attributes_multiple_multiple_two assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(@obj, @obj.replace_attributes('Misc', 'Replaced', 'test2')) assert_equal([ 'Replaced', 'test2' ], @obj.get_attributes('Misc')) end def test_replace_attributes_multiple_multiple_same assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(@obj, @obj.replace_attributes('Misc', 'Replaced', 'test2', 'test3')) assert_equal([ 'Replaced', 'test2', 'test3' ], @obj.get_attributes('Misc')) end def test_replace_attributes_multiple_multiple_over assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(@obj, @obj.replace_attributes('Misc', 'Replaced', 'test2', 'test3', '4')) assert_equal([ 'Replaced', 'test2', 'test3', '4' ], @obj.get_attributes('Misc')) end def test_replace_attributes_nonexistent assert_equal(nil, @obj.attribute('NonExistent')) assert_equal(@obj, @obj.replace_attributes('NonExistent', 'test')) assert_equal([ 'test' ], @obj.get_attributes('NonExistent')) end def test_replace_attributes_nonexistent_multiple assert_equal(nil, @obj.attribute('NonExistent')) assert_equal(@obj, @obj.replace_attributes('NonExistent', 'test', 'gff2', 'attr')) assert_equal([ 'test', 'gff2', 'attr' ], @obj.get_attributes('NonExistent')) end def test_delete_attribute assert_equal('0.0003', @obj.attribute('E_value')) assert_equal('0.0003', @obj.delete_attribute('E_value', '0.0003')) assert_equal(nil, @obj.attribute('E_value')) end def test_delete_attribute_nil assert_equal('0.0003', @obj.attribute('E_value')) assert_equal(nil, @obj.delete_attribute('E_value', '3')) assert_equal('0.0003', @obj.attribute('E_value')) end def test_delete_attribute_multiple assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal('free text', @obj.delete_attribute('Misc', 'free text')) assert_equal([ 'IdString', '5678' ], @obj.get_attributes('Misc')) end def test_delete_attribute_multiple2 assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal('IdString', @obj.delete_attribute('Misc', 'IdString')) assert_equal([ 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal('5678', @obj.delete_attribute('Misc', '5678')) assert_equal([ 'free text' ], @obj.get_attributes('Misc')) end def test_delete_attribute_multiple_nil assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(nil, @obj.delete_attribute('Misc', 'test')) assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) end def test_delete_attribute_nonexistent assert_equal(nil, @obj.attribute('NonExistent')) assert_equal(nil, @obj.delete_attribute('NonExistent', 'test')) assert_equal([], @obj.get_attributes('NonExistent')) end def test_delete_attributes assert_equal('0.0003', @obj.attribute('E_value')) assert_equal(@obj, @obj.delete_attributes('E_value')) assert_equal(nil, @obj.attribute('E_value')) end def test_delete_attributes_multiple assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) assert_equal(@obj, @obj.delete_attributes('Misc')) assert_equal([], @obj.get_attributes('Misc')) end def test_delete_attributes_nonexistent assert_equal(nil, @obj.attribute('NonExistent')) assert_equal(nil, @obj.delete_attributes('NonExistent')) assert_equal([], @obj.get_attributes('NonExistent')) end def test_sort_attributes_by_tag! tags = %w( Comment Align E_value Note ) assert_equal(@obj, @obj.sort_attributes_by_tag!(tags)) assert_equal(%w( Comment Align Align E_value Note Target Misc Misc Misc ), @obj.attributes.collect { |x| x[0] }) # check if the order of 'Misc' is not changed assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) end def test_sort_attributes_by_tag_bang_test2 tags = %w( E_value Misc Note Target ) assert_equal(@obj, @obj.sort_attributes_by_tag!(tags)) assert_equal(%w( E_value Misc Misc Misc Note Target Align Align Comment ), @obj.attributes.collect { |x| x[0] }) # check if the order of 'Misc' is not changed assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) end def test_sort_attributes_by_tag_bang_with_block assert_equal(@obj, @obj.sort_attributes_by_tag! { |x, y| x <=> y }) assert_equal(%w( Align Align Comment E_value Misc Misc Misc Note Target ), @obj.attributes.collect { |x| x[0] }) # check if the order of 'Misc' is not changed assert_equal([ 'IdString', 'free text', '5678' ], @obj.get_attributes('Misc')) end end #class TestGFF2Record class TestGFF2RecordEmpty < Test::Unit::TestCase def setup @obj = Bio::GFF::GFF2::Record.new('# test comment') end def test_comment_only? assert_equal(true, @obj.comment_only?) end def test_comment_only_false @obj.seqname = 'test' assert_equal(false, @obj.comment_only?) end def test_to_s assert_equal("# test comment\n", @obj.to_s) end def test_to_s_not_empty @obj.seqname = 'test' @obj.feature = 'region' @obj.start = 1 @obj.end = 100 assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\t\t# test comment\n", @obj.to_s) @obj.add_attribute('Gene', 'unknown') assert_equal("test\t.\tregion\t1\t100\t.\t.\t.\tGene unknown\t# test comment\n", @obj.to_s) end def test_comment assert_equal(' test comment', @obj.comment) end def test_comment_eq assert_equal('changed the comment', @obj.comment = 'changed the comment') end end #class TestGFF2RecordEmpty class TestGFF2ComplexAttributes < Test::Unit::TestCase # The test string comes from the Popular genome annotation from the JGI. # ftp://ftp.jgi-psf.org/pub/JGI_data/Poplar/annotation/v1.1/Poptr1_1.JamboreeModels.gff.gz # Thanks to Tomoaki NISHIYAMA who picks up the example line. def test_attributes_case1 str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3\n" attributes = [ [ "name", "grail3.0116000101" ], [ "proteinId", "639579" ], [ "exonNumber", "3" ] ] record = Bio::GFF::GFF2::Record.new(str) assert_equal(attributes, record.attributes) end # The test string is modified from that of test_attributes_case1. def test_attributes_case2 str = "LG_I\tJGI\tCDS\t11052\t11064\t.\t-\t0\tname \"grail3.0116000101\"; proteinId 639579; exonNumber 3; Note \"Semicolons ; and \;, and quote \\\" can be OK\"; Comment \"This is the \\\"comment\\\"\"\n" attributes = [ [ "name", "grail3.0116000101" ], [ "proteinId", "639579" ], [ "exonNumber", "3" ], [ "Note", "Semicolons ; and ;, and quote \" can be OK" ], [ "Comment", "This is the \"comment\"" ] ] record = Bio::GFF::GFF2::Record.new(str) assert_equal(attributes, record.attributes) end def test_attributes_incompatible_backslash_semicolon # No special treatments for backslash-semicolon outside the free text. str =<test01 ACGAAGATTTGTATGACTGATTTATCCTGGACAGGCATTGGTCAGATGTCTCCTTCCGTATCGTCGTTTA GTTGCAAATCCGAGTGTTCGGGGGTATTGCTATTTGCCACCTAGAAGCGCAACATGCCCAGCTTCACACA CCATAGCGAACACGCCGCCCCGGTGGCGACTATCGGTCGAAGTTAAGACAATTCATGGGCGAAACGAGAT AATGGGTACTGCACCCCTCGTCCTGTAGAGACGTCACAGCCAACGTGCCTTCTTATCTTGATACATTAGT GCCCAAGAATGCGATCCCAGAAGTCTTGGTTCTAAAGTCGTCGGAAAGATTTGAGGAACTGCCATACAGC CCGTGGGTGAAACTGTCGACATCCATTGTGCGAATAGGCCTGCTAGTGAC END_OF_DATA @gff3 = Bio::GFF::GFF3.new(@data) end def test_const_version assert_equal(3, Bio::GFF::GFF3::VERSION) end def test_sequence_regions region = Bio::GFF::GFF3::SequenceRegion.new('test01', 1, 400) assert_equal([ region ], @gff3.sequence_regions) end def test_gff_version assert_equal('3', @gff3.gff_version) end def test_records assert_equal(7, @gff3.records.size) r_test01 = Bio::GFF::GFF3::Record.new('test01', 'RANDOM', 'contig', 1, 400, nil, '+', nil, [ ['ID', 'test01'], ['Note', 'this is test'] ]) r_mrna01 = Bio::GFF::GFF3::Record.new('test01', nil, 'mRNA', 101, 230, nil, '+', nil, [ ['ID', 'mrna01'], ['Name', 'testmRNA'], ['Note', 'this is test mRNA'] ]) r_exon01 = Bio::GFF::GFF3::Record.new('test01', nil, 'exon', 101, 160, nil, '+', nil, [ ['ID', 'exon01'], ['Name', 'exon01'], ['Alias', 'exon 1'], ['Parent', 'mrna01'], ['Parent', 'mrna01a'] ]) target = Bio::GFF::GFF3::Record::Target.new('EST101', 1, 21) gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6') r_match01 =Bio::GFF::GFF3::Record.new('test01', nil, 'Match', 101, 123, nil, nil, nil, [ ['ID', 'match01'], ['Name', 'match01'], ['Target', target], ['Gap', gap] ]) assert_equal(r_test01, @gff3.records[0]) assert_equal(r_mrna01, @gff3.records[1]) assert_equal(r_exon01, @gff3.records[3]) assert_equal(r_match01, @gff3.records[6]) end def test_sequences assert_equal(1, @gff3.sequences.size) assert_equal('test01', @gff3.sequences[0].entry_id) assert_equal('3510a3c4f66f9c2ab8d4d97446490aced7ed1fa4', Digest::SHA1.hexdigest(@gff3.sequences[0].seq.to_s)) end def test_to_s assert_equal(@data, @gff3.to_s) end end #class TestGFF3 class TestGFF3Record < Test::Unit::TestCase def setup data =<B%09C=100%25;d=e,f,g h', @obj.instance_eval { escape(str) }) end def test_escape_attribute str = @str assert_equal('A>B%09C%3D100%25%3Bd%3De%2Cf%2Cg h', @obj.instance_eval { escape_attribute(str) }) end def test_escape_seqid str = @str assert_equal('A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h', @obj.instance_eval { escape_seqid(str) }) end def test_unescape escaped_str = 'A%3EB%09C%3D100%25%3Bd%3De%2Cf%2Cg%20h' assert_equal(@str, @obj.instance_eval { unescape(escaped_str) }) end end #class TestGFF3RecordEscape class TestGFF3RecordTarget < Test::Unit::TestCase def setup @target = [ Bio::GFF::GFF3::Record::Target.new('ABCD1234', 123, 456, '+'), Bio::GFF::GFF3::Record::Target.new(">X Y=Z;P%,Q\tR", 78, 90), Bio::GFF::GFF3::Record::Target.new(nil, nil, nil), ] end def test_parse strings = [ 'ABCD1234 123 456 +', '%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90', '' ] @target.each do |target| str = strings.shift assert_equal(target, Bio::GFF::GFF3::Record::Target.parse(str)) end end def test_target_id assert_equal('ABCD1234', @target[0].target_id) assert_equal(">X Y=Z;P%,Q\tR", @target[1].target_id) assert_equal(nil, @target[2].target_id) end def test_start assert_equal(123, @target[0].start) assert_equal(78, @target[1].start) assert_nil(@target[2].start) end def test_end assert_equal(456, @target[0].end) assert_equal(90, @target[1].end) assert_nil(@target[2].end) end def test_strand assert_equal('+', @target[0].strand) assert_nil(@target[1].strand) assert_nil(@target[2].strand) end def test_to_s assert_equal('ABCD1234 123 456 +', @target[0].to_s) assert_equal('%3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90', @target[1].to_s) assert_equal('. . .', @target[2].to_s) end end #class TestGFF3RecordTarget class TestGFF3RecordGap < Test::Unit::TestCase def setup # examples taken from http://song.sourceforge.net/gff3.shtml @gaps_src = [ 'M8 D3 M6 I1 M6', 'M3 I1 M2 F1 M4', 'M3 I1 M2 R1 M4' ] @gaps = @gaps_src.collect { |x| Bio::GFF::GFF3::Record::Gap.new(x) } end def test_to_s @gaps_src.each do |src| assert_equal(src, @gaps.shift.to_s) end end def test_eqeq gap = Bio::GFF::GFF3::Record::Gap.new('M8 D3 M6 I1 M6') assert(gap == @gaps[0]) assert_equal(false, gap == @gaps[1]) end def test_process_sequences_na ref = 'CAAGACCTAAACTGGATTCCAAT' tgt = 'CAAGACCTCTGGATATCCAAT' ref_aligned = 'CAAGACCTAAACTGGAT-TCCAAT' tgt_aligned = 'CAAGACCT---CTGGATATCCAAT' assert_equal([ ref_aligned, tgt_aligned ], @gaps[0].process_sequences_na(ref, tgt)) end def test_process_sequences_na_tooshort ref = 'CAAGACCTAAACTGGATTCCAA' tgt = 'CAAGACCTCTGGATATCCAA' assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) } ref = 'c' tgt = 'c' assert_raise(RuntimeError) { @gaps[0].process_sequences_na(ref, tgt) } end def test_process_sequences_na_aa ref1 = 'atgaaggaggttattgaatgtcggcggt' tgt1 = 'MKEVVINVGG' ref1_aligned = 'atgaaggag---gttattgaatgtcggcggt' tgt1_aligned = 'M K E V V I >N V G G ' assert_equal([ ref1_aligned, tgt1_aligned ], @gaps[1].process_sequences_na_aa(ref1, tgt1)) end def test_process_sequences_na_aa_reverse_frameshift ref2 = 'atgaaggaggttataatgtcggcggt' tgt2 = 'MKEVVINVGG' ref2_aligned = 'atgaaggag---gttatX Y=Z;P%,Q\tR", 78, 90), Bio::GFF::GFF3::SequenceRegion.new(nil, nil, nil), ] end def test_parse strings = [ '##sequence-region ABCD1234 123 456', '##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90', '##sequence-region' ] @data.each do |reg| str = strings.shift assert_equal(reg, Bio::GFF::GFF3::SequenceRegion.parse(str)) end end def test_seqid assert_equal('ABCD1234', @data[0].seqid) assert_equal(">X Y=Z;P%,Q\tR", @data[1].seqid) assert_equal(nil, @data[2].seqid) end def test_start assert_equal(123, @data[0].start) assert_equal(78, @data[1].start) assert_nil(@data[2].start) end def test_end assert_equal(456, @data[0].end) assert_equal(90, @data[1].end) assert_nil(@data[2].end) end def test_to_s assert_equal("##sequence-region ABCD1234 123 456\n", @data[0].to_s) assert_equal("##sequence-region %3EX%20Y%3DZ%3BP%25%2CQ%09R 78 90\n", @data[1].to_s) assert_equal("##sequence-region . . .\n", @data[2].to_s) end end #class TestGFF3SequenceRegion class TestGFF3MetaData < Test::Unit::TestCase def setup @data = Bio::GFF::GFF3::MetaData.new('feature-ontology', 'http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12') end def test_parse assert_equal(@data, Bio::GFF::GFF3::MetaData.parse('##feature-ontology http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12')) end def test_directive assert_equal('feature-ontology', @data.directive) end def test_data assert_equal('http://song.cvs.sourceforge.net/*checkout*/song/ontology/sofa.obo?revision=1.12', @data.data) end end #class TestGFF3MetaData end #module Bio