lib/bio/db/kegg/genes.rb in bio-1.3.1 vs lib/bio/db/kegg/genes.rb in bio-1.4.0
- old
+ new
@@ -3,11 +3,11 @@
#
# Copyright:: Copyright (C) 2001, 2002, 2006
# Toshiaki Katayama <k@bioruby.org>
# License:: The Ruby License
#
-# $Id: genes.rb,v 0.26 2007/12/14 16:20:38 k Exp $
+# $Id:$
#
#
# == KEGG GENES parser
#
# See http://www.genome.jp/kegg/genes.html
@@ -36,11 +36,11 @@
# p entry.definition # => String
# p entry.eclinks # => Array
#
# # PATHWAY
# p entry.pathway # => String
-# p entry.pathways # => Array
+# p entry.pathways # => Hash
#
# # POSITION
# p entry.position # => String
# p entry.chromosome # => String
# p entry.gbposition # => String
@@ -50,10 +50,13 @@
# p entry.motif # => Hash of Array
#
# # DBLINKS
# p entry.dblinks # => Hash of Array
#
+# # STRUCTURE
+# p entry.structure # => Array
+#
# # CODON_USAGE
# p entry.codon_usage # => Hash
# p entry.cu_list # => Array
#
# # AASEQ
@@ -71,22 +74,57 @@
autoload :KEGGDB, 'bio/db'
autoload :Locations, 'bio/location'
autoload :Sequence, 'bio/sequence'
+ require 'bio/db/kegg/common'
+
class KEGG
+# == Description
+#
+# KEGG GENES entry parser.
+#
+# == References
+#
+# * http://www.genome.jp/kegg/genes.html
+#
class GENES < KEGGDB
DELIMITER = RS = "\n///\n"
TAGSIZE = 12
+ include Common::DblinksAsHash
+ # Returns a Hash of the DB name and an Array of entry IDs in DBLINKS field.
+ def dblinks_as_hash; super; end if false #dummy for RDoc
+ alias dblinks dblinks_as_hash
+
+ include Common::PathwaysAsHash
+ # Returns a Hash of the pathway ID and name in PATHWAY field.
+ def pathways_as_hash; super; end if false #dummy for RDoc
+ alias pathways pathways_as_hash
+
+ include Common::OrthologsAsHash
+ # Returns a Hash of the orthology ID and definition in ORTHOLOGY field.
+ def orthologs_as_hash; super; end if false #dummy for RDoc
+ alias orthologs orthologs_as_hash
+
+ # Creates a new Bio::KEGG::GENES object.
+ # ---
+ # *Arguments*:
+ # * (required) _entry_: (String) single entry as a string
+ # *Returns*:: Bio::KEGG::GENES object
def initialize(entry)
super(entry, TAGSIZE)
end
-
+ # Returns the "ENTRY" line content as a Hash.
+ # For example,
+ # {"organism"=>"E.coli", "division"=>"CDS", "id"=>"b0356"}
+ #
+ # ---
+ # *Returns*:: Hash
def entry
unless @data['ENTRY']
hash = Hash.new('')
if get('ENTRY').length > 30
e = get('ENTRY')
@@ -97,84 +135,135 @@
@data['ENTRY'] = hash
end
@data['ENTRY']
end
+ # ID of the entry, described in the ENTRY line.
+ # ---
+ # *Returns*:: String
def entry_id
entry['id']
end
+ # Division of the entry, described in the ENTRY line.
+ # ---
+ # *Returns*:: String
def division
entry['division'] # CDS, tRNA etc.
end
+ # Organism name of the entry, described in the ENTRY line.
+ # ---
+ # *Returns*:: String
def organism
entry['organism'] # H.sapiens etc.
end
+ # Returns the NAME line.
+ # ---
+ # *Returns*:: String
def name
field_fetch('NAME')
end
+ # Names of the entry as an Array, described in the NAME line.
+ #
+ # ---
+ # *Returns*:: Array containing String
def genes
name.split(', ')
end
+ # Returns the first gene name described in the NAME line.
+ # ---
+ # *Returns*:: String
def gene
genes.first
end
+ # Definition of the entry, described in the DEFINITION line.
+ # ---
+ # *Returns*:: String
def definition
field_fetch('DEFINITION')
end
+ # Enzyme's EC numbers shown in the DEFINITION line.
+ # ---
+ # *Returns*:: Array containing String
def eclinks
ec_list = definition.slice(/\[EC:(.*?)\]/, 1)
if ec_list
ec_list.strip.split(/\s+/)
else
[]
end
end
- def orthologs
+ # Orthologs described in the ORTHOLOGY lines.
+ # ---
+ # *Returns*:: Array containing String
+ def orthologs_as_strings
lines_fetch('ORTHOLOGY')
end
+ # Returns the PATHWAY lines as a String.
+ # ---
+ # *Returns*:: String
def pathway
field_fetch('PATHWAY')
end
- def pathways
- pathway.scan(/\[PATH:(.*?)\]/).flatten
+ # Pathways described in the PATHWAY lines.
+ # ---
+ # *Returns*:: Array containing String
+ def pathways_as_strings
+ lines_fetch('PATHWAY')
end
+ # The position in the genome described in the POSITION line.
+ # ---
+ # *Returns*:: String
def position
unless @data['POSITION']
@data['POSITION'] = fetch('POSITION').gsub(/\s/, '')
end
@data['POSITION']
end
+ # Chromosome described in the POSITION line.
+ # ---
+ # *Returns*:: String or nil
def chromosome
if position[/:/]
position.sub(/:.*/, '')
elsif ! position[/\.\./]
position
else
nil
end
end
+ # The position in the genome described in the POSITION line
+ # as GenBank feature table location formatted string.
+ # ---
+ # *Returns*:: String
def gbposition
position.sub(/.*?:/, '')
end
+ # The position in the genome described in the POSITION line
+ # as Bio::Locations object.
+ # ---
+ # *Returns*:: Bio::Locations object
def locations
Bio::Locations.new(gbposition)
end
+ # Motif information described in the MOTIF lines.
+ # ---
+ # *Returns*:: Hash
def motif
unless @data['MOTIF']
hash = {}
db = nil
lines_fetch('MOTIF').each do |line|
@@ -189,22 +278,31 @@
@data['MOTIF'] = hash
end
@data['MOTIF'] # Hash of Array of IDs in MOTIF
end
- def dblinks
- unless @data['DBLINKS']
- hash = {}
- get('DBLINKS').scan(/(\S+):\s*(.*)\n?/).each do |db, str|
- id_array = str.strip.split(/\s+/)
- hash[db] = id_array
- end
- @data['DBLINKS'] = hash
+ # Links to other databases described in the DBLINKS lines.
+ # ---
+ # *Returns*:: Array containing String objects
+ def dblinks_as_strings
+ lines_fetch('DBLINKS')
+ end
+
+ # Returns structure ID information described in the STRUCTURE lines.
+ # ---
+ # *Returns*:: Array containing String
+ def structure
+ unless @data['STRUCTURE']
+ @data['STRUCTURE'] = fetch('STRUCTURE').sub(/(PDB: )*/,'').split(/\s+/)
end
- @data['DBLINKS'] # Hash of Array of IDs in DBLINKS
+ @data['STRUCTURE'] # ['PDB:1A9X', ...]
end
+ alias structures structure
+ # Codon usage data described in the CODON_USAGE lines.
+ # ---
+ # *Returns*:: Hash
def codon_usage(codon = nil)
unless @data['CODON_USAGE']
hash = Hash.new
list = cu_list
base = %w(t c a g)
@@ -218,38 +316,53 @@
@data['CODON_USAGE'] = hash
end
@data['CODON_USAGE']
end
+ # Codon usage data described in the CODON_USAGE lines as an array.
+ # ---
+ # *Returns*:: Array
def cu_list
ary = []
get('CODON_USAGE').sub(/.*/,'').each_line do |line| # cut 1st line
line.chomp.sub(/^.{11}/, '').scan(/..../) do |cu|
ary.push(cu.to_i)
end
end
return ary
end
+ # Returns amino acid sequence described in the AASEQ lines.
+ # ---
+ # *Returns*:: Bio::Sequence::AA object
def aaseq
unless @data['AASEQ']
@data['AASEQ'] = Bio::Sequence::AA.new(fetch('AASEQ').gsub(/\d+/, ''))
end
@data['AASEQ']
end
+ # Returns length of the amino acid sequence described in the AASEQ lines.
+ # ---
+ # *Returns*:: Integer
def aalen
fetch('AASEQ')[/\d+/].to_i
end
+ # Returns nucleic acid sequence described in the NTSEQ lines.
+ # ---
+ # *Returns*:: Bio::Sequence::NA object
def ntseq
unless @data['NTSEQ']
@data['NTSEQ'] = Bio::Sequence::NA.new(fetch('NTSEQ').gsub(/\d+/, ''))
end
@data['NTSEQ']
end
alias naseq ntseq
+ # Returns nucleic acid sequence length.
+ # ---
+ # *Returns*:: Integer
def ntlen
fetch('NTSEQ')[/\d+/].to_i
end
alias nalen ntlen