lib/bio/db/embl/embl.rb in bio-1.0.0 vs lib/bio/db/embl/embl.rb in bio-1.1.0
- old
+ new
@@ -1,13 +1,13 @@
#
# = bio/db/embl/embl.rb - EMBL database class
#
#
-# Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao <n@bioruby.org>
-# License:: LGPL
+# Copyright:: Copyright (C) 2001-2007 Mitsuteru C. Nakao <n@bioruby.org>
+# License:: The Ruby License
#
-# $Id: embl.rb,v 1.26 2006/01/28 06:40:38 nakao Exp $
+# $Id: embl.rb,v 1.29 2007/04/05 23:35:40 trevor Exp $
#
# == Description
#
# Parser class for EMBL database entry.
#
@@ -26,28 +26,10 @@
# http://www.ebi.ac.uk/embl/
#
# * The EMBL Nucleotide Sequence Database: Users Manual
# http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html
#
-#--
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-#
-#++
-#
require 'bio/db'
require 'bio/db/embl/common'
module Bio
@@ -56,11 +38,11 @@
# returns contents in the ID line.
# * Bio::EMBL#id_line -> <ID Hash>
# where <ID Hash> is:
# {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String,
- # 'SEQUENCE_LENGTH' => Int}
+ # 'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int}
#
# ID Line
# "ID ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP."
#
# DATA_CLASS = ['standard']
@@ -85,18 +67,35 @@
# SYN (Synthetic)
# STS (STSs)
# UNC (Unclassified)
# VRL (Viruses)
#
+ # Rel 89-
+ # ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP.
+ # ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP.
+ # 1. Primary accession number
+ # 2. Sequence version number
+ # 3. Topology: 'circular' or 'linear'
+ # 4. Molecule type (see note 1 below)
+ # 5. Data class (see section 3.1)
+ # 6. Taxonomic division (see section 3.2)
+ # 7. Sequence length (see note 2 below)
def id_line(key=nil)
unless @data['ID']
tmp = Hash.new
idline = fetch('ID').split(/; +/)
- tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline[0].split(/ +/)
- tmp['MOLECULE_TYPE'] = idline[1]
- tmp['DIVISION'] = idline[2]
- tmp['SEQUENCE_LENGTH'] = idline[3].strip.split(' ').first.to_i
+ tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/)
+ if idline.first =~ /^SV/
+ tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last
+ tmp['TOPOLOGY'] = idline.shift
+ tmp['MOLECULE_TYPE'] = idline.shift
+ tmp['DATA_CLASS'] = idline.shift
+ else
+ tmp['MOLECULE_TYPE'] = idline.shift
+ end
+ tmp['DIVISION'] = idline.shift
+ tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i
@data['ID'] = tmp
end
if key
@@ -144,14 +143,18 @@
# * Bio::EMBL#version -> accession in Int
#
# SV Line; sequence version (1/entry)
# SV Accession.Version
def sv
- field_fetch('SV').sub(/;/,'')
+ if (v = field_fetch('SV').sub(/;/,'')) == ""
+ [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.')
+ else
+ v
+ end
end
def version
- sv.split(".")[1].to_i
+ (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i
end
# returns contents in the date (DT) line.
# * Bio::EMBL#dt -> <DT Hash>
@@ -392,63 +395,8 @@
end
return feature
end
-end
+end # class EMBL
-end
-
-
-if __FILE__ == $0
- while ent = $<.gets(Bio::EMBL::RS)
- puts "\n ==> e = Bio::EMBL.new(ent) "
- e = Bio::EMBL.new(ent)
-
- puts "\n ==> e.entry_id "
- p e.entry_id
- puts "\n ==> e.id_line "
- p e.id_line
- puts "\n ==> e.id_line('molecule') "
- p e.id_line('molecule')
- puts "\n ==> e.molecule "
- p e.molecule
- puts "\n ==> e.ac "
- p e.ac
- puts "\n ==> e.sv "
- p e.sv
- puts "\n ==> e.dt "
- p e.dt
- puts "\n ==> e.dt('created') "
- p e.dt('created')
- puts "\n ==> e.de "
- p e.de
- puts "\n ==> e.kw "
- p e.kw
- puts "\n ==> e.os "
- p e.os
- puts "\n ==> e.oc "
- p e.oc
- puts "\n ==> e.og "
- p e.og
- puts "\n ==> e.ref "
- p e.ref
- puts "\n ==> e.dr "
- p e.dr
- puts "\n ==> e.ft "
- p e.ft
- puts "\n ==> e.each_cds {|c| p c}"
- p e.each_cds {|c| p c }
- puts "\n ==> e.sq "
- p e.sq
- puts "\n ==> e.sq('a') "
- p e.sq('a')
- puts "\n ==> e.gc"
- p e.gc
- puts "\n ==> e.seq "
- p e.seq
- end
-
-end
-
-
-
+end # module Bio