lib/bio/db/embl/embl.rb in bio-1.0.0 vs lib/bio/db/embl/embl.rb in bio-1.1.0

- old
+ new

@@ -1,13 +1,13 @@ # # = bio/db/embl/embl.rb - EMBL database class # # -# Copyright:: Copyright (C) 2001-2005 Mitsuteru C. Nakao <n@bioruby.org> -# License:: LGPL +# Copyright:: Copyright (C) 2001-2007 Mitsuteru C. Nakao <n@bioruby.org> +# License:: The Ruby License # -# $Id: embl.rb,v 1.26 2006/01/28 06:40:38 nakao Exp $ +# $Id: embl.rb,v 1.29 2007/04/05 23:35:40 trevor Exp $ # # == Description # # Parser class for EMBL database entry. # @@ -26,28 +26,10 @@ # http://www.ebi.ac.uk/embl/ # # * The EMBL Nucleotide Sequence Database: Users Manual # http://www.ebi.ac.uk/embl/Documentation/User_manual/usrman.html # -#-- -# -# This library is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public -# License as published by the Free Software Foundation; either -# version 2 of the License, or (at your option) any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with this library; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -# -#++ -# require 'bio/db' require 'bio/db/embl/common' module Bio @@ -56,11 +38,11 @@ # returns contents in the ID line. # * Bio::EMBL#id_line -> <ID Hash> # where <ID Hash> is: # {'ENTRY_NAME' => String, 'MOLECULE_TYPE' => String, 'DIVISION' => String, - # 'SEQUENCE_LENGTH' => Int} + # 'SEQUENCE_LENGTH' => Int, 'SEQUENCE_VERSION' => Int} # # ID Line # "ID ENTRY_NAME DATA_CLASS; MOLECULE_TYPE; DIVISION; SEQUENCE_LENGTH BP." # # DATA_CLASS = ['standard'] @@ -85,18 +67,35 @@ # SYN (Synthetic) # STS (STSs) # UNC (Unclassified) # VRL (Viruses) # + # Rel 89- + # ID CD789012; SV 4; linear; genomic DNA; HTG; MAM; 500 BP. + # ID <1>; SV <2>; <3>; <4>; <5>; <6>; <7> BP. + # 1. Primary accession number + # 2. Sequence version number + # 3. Topology: 'circular' or 'linear' + # 4. Molecule type (see note 1 below) + # 5. Data class (see section 3.1) + # 6. Taxonomic division (see section 3.2) + # 7. Sequence length (see note 2 below) def id_line(key=nil) unless @data['ID'] tmp = Hash.new idline = fetch('ID').split(/; +/) - tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline[0].split(/ +/) - tmp['MOLECULE_TYPE'] = idline[1] - tmp['DIVISION'] = idline[2] - tmp['SEQUENCE_LENGTH'] = idline[3].strip.split(' ').first.to_i + tmp['ENTRY_NAME'], tmp['DATA_CLASS'] = idline.shift.split(/ +/) + if idline.first =~ /^SV/ + tmp['SEQUENCE_VERSION'] = idline.shift.split(' ').last + tmp['TOPOLOGY'] = idline.shift + tmp['MOLECULE_TYPE'] = idline.shift + tmp['DATA_CLASS'] = idline.shift + else + tmp['MOLECULE_TYPE'] = idline.shift + end + tmp['DIVISION'] = idline.shift + tmp['SEQUENCE_LENGTH'] = idline.shift.strip.split(' ').first.to_i @data['ID'] = tmp end if key @@ -144,14 +143,18 @@ # * Bio::EMBL#version -> accession in Int # # SV Line; sequence version (1/entry) # SV Accession.Version def sv - field_fetch('SV').sub(/;/,'') + if (v = field_fetch('SV').sub(/;/,'')) == "" + [id_line['ENTRY_NAME'], id_line['SEQUENCE_VERSION']].join('.') + else + v + end end def version - sv.split(".")[1].to_i + (sv.split(".")[1] || id_line['SEQUENCE_VERSION']).to_i end # returns contents in the date (DT) line. # * Bio::EMBL#dt -> <DT Hash> @@ -392,63 +395,8 @@ end return feature end -end +end # class EMBL -end - - -if __FILE__ == $0 - while ent = $<.gets(Bio::EMBL::RS) - puts "\n ==> e = Bio::EMBL.new(ent) " - e = Bio::EMBL.new(ent) - - puts "\n ==> e.entry_id " - p e.entry_id - puts "\n ==> e.id_line " - p e.id_line - puts "\n ==> e.id_line('molecule') " - p e.id_line('molecule') - puts "\n ==> e.molecule " - p e.molecule - puts "\n ==> e.ac " - p e.ac - puts "\n ==> e.sv " - p e.sv - puts "\n ==> e.dt " - p e.dt - puts "\n ==> e.dt('created') " - p e.dt('created') - puts "\n ==> e.de " - p e.de - puts "\n ==> e.kw " - p e.kw - puts "\n ==> e.os " - p e.os - puts "\n ==> e.oc " - p e.oc - puts "\n ==> e.og " - p e.og - puts "\n ==> e.ref " - p e.ref - puts "\n ==> e.dr " - p e.dr - puts "\n ==> e.ft " - p e.ft - puts "\n ==> e.each_cds {|c| p c}" - p e.each_cds {|c| p c } - puts "\n ==> e.sq " - p e.sq - puts "\n ==> e.sq('a') " - p e.sq('a') - puts "\n ==> e.gc" - p e.gc - puts "\n ==> e.seq " - p e.seq - end - -end - - - +end # module Bio