lib/bio/io/sql.rb in bio-1.0.0 vs lib/bio/io/sql.rb in bio-1.1.0
- old
+ new
@@ -1,26 +1,14 @@
#
-# bio/io/sql.rb - BioSQL access module
+# = bio/io/sql.rb - BioSQL access module
#
-# Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
+# Copyright:: Copyright (C) 2002 Toshiaki Katayama <k@bioruby.org>
+# Copyright:: Copyright (C) 2006 Raoul Jean Pierre Bonnal <raoul.bonnal@itb.cnr.it>
+# License:: The Ruby License
#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2 of the License, or (at your option) any later version.
+# $Id: sql.rb,v 1.8 2007/04/05 23:35:41 trevor Exp $
#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-#
-# $Id: sql.rb,v 1.4 2005/09/26 13:04:28 k Exp $
-#
begin
require 'dbi'
rescue LoadError
end
@@ -28,299 +16,310 @@
require 'bio/feature'
module Bio
- class SQL
+class SQL
- def initialize(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
- @dbh = DBI.connect(db, user, pass)
- end
+ def initialize(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
+ @dbh = DBI.connect(db, user, pass)
+ end
- def close
- @dbh.disconnect
- end
+ def close
+ @dbh.disconnect
+ end
- def fetch(accession) # or display_id for fall back
- query = "select * from bioentry where accession = ?"
- entry = @dbh.execute(query, accession).fetch
- return Sequence.new(@dbh, entry) if entry
+ # Returns Bio::SQL::Sequence object.
+ def fetch(accession) # or display_id for fall back
+ query = "select * from bioentry where accession = ?"
+ entry = @dbh.execute(query, accession).fetch
+ return Sequence.new(@dbh, entry) if entry
- query = "select * from bioentry where display_id = ?"
- entry = @dbh.execute(query, accession).fetch
- return Sequence.new(@dbh, entry) if entry
- end
- alias get_by_id fetch
+ query = "select * from bioentry where display_id = ?"
+ entry = @dbh.execute(query, accession).fetch
+ return Sequence.new(@dbh, entry) if entry
+ end
+ alias get_by_id fetch
- # for lazy fetching
+ # for lazy fetching
- class Sequence
+ class Sequence
- def initialize(dbh, entry)
- @dbh = dbh
- @bioentry_id = entry['bioentry_id']
- @database_id = entry['biodatabase_id']
- @entry_id = entry['display_id']
- @accession = entry['accession']
- @version = entry['entry_version']
- @division = entry['division']
- end
- attr_reader :accession, :division, :entry_id, :version
+ def initialize(dbh, entry)
+ @dbh = dbh
+ @bioentry_id = entry['bioentry_id']
+ @database_id = entry['biodatabase_id']
+ @entry_id = entry['display_id']
+ @accession = entry['accession']
+ @version = entry['entry_version']
+ @division = entry['division']
+ end
+ attr_reader :accession, :division, :entry_id, :version
- def to_fasta
- if seq = seq
- return seq.to_fasta(@accession)
- end
+ def to_fasta
+ if seq = seq
+ return seq.to_fasta(@accession)
end
+ end
- def seq
- query = "select * from biosequence where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- return unless row
+ # Returns Bio::Sequence::NA or AA object.
+ def seq
+ query = "select * from biosequence where bioentry_id = ?"
+ row = @dbh.execute(query, @bioentry_id).fetch
+ return unless row
- mol = row['molecule']
- seq = row['biosequence_str']
+ mol = row['alphabet']
+ seq = row['seq']
- case mol
- when /.na/i # 'dna' or 'rna'
- Bio::Sequence::NA.new(seq)
- else # 'protein'
- Bio::Sequence::AA.new(seq)
- end
+ case mol
+ when /.na/i # 'dna' or 'rna'
+ Bio::Sequence::NA.new(seq)
+ else # 'protein'
+ Bio::Sequence::AA.new(seq)
end
+ end
- def subseq(from, to)
- length = to - from + 1
- query = "select molecule, substring(biosequence_str, ?, ?) as subseq" +
- " from biosequence where bioentry_id = ?"
- row = @dbh.execute(query, from, length, @bioentry_id).fetch
- return unless row
+ # Returns Bio::Sequence::NA or AA object (by lazy fetching).
+ def subseq(from, to)
+ length = to - from + 1
+ query = "select alphabet, substring(seq, ?, ?) as subseq" +
+ " from biosequence where bioentry_id = ?"
+ row = @dbh.execute(query, from, length, @bioentry_id).fetch
+ return unless row
- mol = row['molecule']
- seq = row['subseq']
+ mol = row['alphabet']
+ seq = row['subseq']
- case mol
- when /.na/i # 'dna' or 'rna'
- Bio::Sequence::NA.new(seq)
- else # 'protein'
- Bio::Sequence::AA.new(seq)
- end
+ case mol
+ when /.na/i # 'dna' or 'rna'
+ Bio::Sequence::NA.new(seq)
+ else # 'protein'
+ Bio::Sequence::AA.new(seq)
end
+ end
- def features
- array = []
- query = "select * from seqfeature where bioentry_id = ?"
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
- next unless row
+ # Returns Bio::Features object.
+ def features
+ array = []
+ query = "select * from seqfeature where bioentry_id = ?"
+ @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
+ next unless row
- f_id = row['seqfeature_id']
- k_id = row['seqfeature_key_id']
- s_id = row['seqfeature_source_id']
- rank = row['seqfeature_rank'].to_i - 1
+ f_id = row['seqfeature_id']
+ k_id = row['type_term_id']
+ s_id = row['source_term_id']
+ rank = row['rank'].to_i - 1
- # key : type (gene, CDS, ...)
- type = feature_key(k_id)
+ # key : type (gene, CDS, ...)
+ type = feature_key(k_id)
- # source : database (EMBL/GenBank/SwissProt)
- database = feature_source(s_id)
+ # source : database (EMBL/GenBank/SwissProt)
+ database = feature_source(s_id)
- # location : position
- locations = feature_locations(f_id)
+ # location : position
+ locations = feature_locations(f_id)
- # qualifier
- qualifiers = feature_qualifiers(f_id)
-
- # rank
- array[rank] = Bio::Feature.new(type, locations, qualifiers)
- end
- return Bio::Features.new(array)
+ # qualifier
+ qualifiers = feature_qualifiers(f_id)
+
+ # rank
+ array[rank] = Bio::Feature.new(type, locations, qualifiers)
end
+ return Bio::Features.new(array)
+ end
- def references
- array = []
- query = <<-END
- select * from bioentry_reference, reference
- where bioentry_id = ? and
- bioentry_reference.reference_id = reference.reference_id
- END
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
- next unless row
+ # Returns reference informations in Array of Hash (not Bio::Reference).
+ def references
+ array = []
+ query = <<-END
+ select * from bioentry_reference, reference
+ where bioentry_id = ? and
+ bioentry_reference.reference_id = reference.reference_id
+ END
+ @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
+ next unless row
- hash = {
- 'start' => row['reference_start'],
- 'end' => row['reference_end'],
- 'journal' => row['reference_location'],
- 'title' => row['reference_title'],
- 'authors' => row['reference_authors'],
- 'medline' => row['reference_medline']
- }
- hash.default = ''
+ hash = {
+ 'start' => row['start_pos'],
+ 'end' => row['end_pos'],
+ 'journal' => row['location'],
+ 'title' => row['title'],
+ 'authors' => row['authors'],
+ 'medline' => row['crc']
+ }
+ hash.default = ''
- rank = row['reference_rank'].to_i - 1
- array[rank] = hash
- end
- return array
+ rank = row['rank'].to_i - 1
+ array[rank] = hash
end
+ return array
+ end
- def comment
- query = "select * from comment where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- row ? row['comment_text'] : ''
- end
+ # Returns the first comment. For complete comments, use comments method.
+ def comment
+ query = "select * from comment where bioentry_id = ?"
+ row = @dbh.execute(query, @bioentry_id).fetch
+ row ? row['comment_text'] : ''
+ end
- def comments
- array = []
- query = "select * from comment where bioentry_id = ?"
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
- next unless row
- rank = row['comment_rank'].to_i - 1
- array[rank] = row['comment_text']
- end
- return array
+ # Returns comments in an Array of Strings.
+ def comments
+ array = []
+ query = "select * from comment where bioentry_id = ?"
+ @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
+ next unless row
+ rank = row['rank'].to_i - 1
+ array[rank] = row['comment_text']
end
+ return array
+ end
- def database
- query = "select * from biodatabase where biodatabase_id = ?"
- row = @dbh.execute(query, @database_id).fetch
- row ? row['name'] : ''
- end
+ def database
+ query = "select * from biodatabase where biodatabase_id = ?"
+ row = @dbh.execute(query, @database_id).fetch
+ row ? row['name'] : ''
+ end
- def date
- query = "select * from bioentry_date where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- row ? row['date'] : ''
- end
+ def date
+ query = "select * from bioentry_date where bioentry_id = ?"
+ row = @dbh.execute(query, @bioentry_id).fetch
+ row ? row['date'] : ''
+ end
- def dblink
- query = "select * from bioentry_direct_links where source_bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- row ? [row['dbname'], row['accession']] : []
- end
+ def dblink
+ query = "select * from bioentry_direct_links where source_bioentry_id = ?"
+ row = @dbh.execute(query, @bioentry_id).fetch
+ row ? [row['dbname'], row['accession']] : []
+ end
- def definition
- query = "select * from bioentry_description where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- row ? row['description'] : ''
- end
+ def definition
+ query = "select * from bioentry_description where bioentry_id = ?"
+ row = @dbh.execute(query, @bioentry_id).fetch
+ row ? row['description'] : ''
+ end
- def keyword
- query = "select * from bioentry_keywords where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- row ? row['keywords'] : ''
- end
+ def keyword
+ query = "select * from bioentry_keywords where bioentry_id = ?"
+ row = @dbh.execute(query, @bioentry_id).fetch
+ row ? row['keywords'] : ''
+ end
- def taxonomy
- query = <<-END
- select full_lineage, common_name, ncbi_taxa_id
- from bioentry_taxa, taxa
- where bioentry_id = ? and bioentry_taxa.taxa_id = taxa.taxa_id
- END
- row = @dbh.execute(query, @bioentry_id).fetch
- @lineage = row ? row['full_lineage'] : ''
- @common_name = row ? row['common_name'] : ''
- @ncbi_taxa_id = row ? row['ncbi_taxa_id'] : ''
- row ? [@lineage, @common_name, @ncbi_taxa_id] : []
- end
+ # Use lineage, common_name, ncbi_taxa_id methods to extract in detail.
+ def taxonomy
+ query = <<-END
+ select taxon_name.name, taxon.ncbi_taxon_id from bioentry
+ join taxon_name using(taxon_id) join taxon using (taxon_id)
+ where bioentry_id = ?
+ END
+ row = @dbh.execute(query, @bioentry_id).fetch
+# @lineage = row ? row['full_lineage'] : ''
+ @common_name = row ? row['name'] : ''
+ @ncbi_taxa_id = row ? row['ncbi_taxon_id'] : ''
+ row ? [@lineage, @common_name, @ncbi_taxa_id] : []
+ end
- def lineage
- taxonomy unless @lineage
- return @lineage
- end
+ def lineage
+ taxonomy unless @lineage
+ return @lineage
+ end
- def common_name
- taxonomy unless @common_name
- return @common_name
- end
+ def common_name
+ taxonomy unless @common_name
+ return @common_name
+ end
- def ncbi_taxa_id
- taxonomy unless @ncbi_taxa_id
- return @ncbi_taxa_id
- end
+ def ncbi_taxa_id
+ taxonomy unless @ncbi_taxa_id
+ return @ncbi_taxa_id
+ end
- private
+ private
- def feature_key(k_id)
- query = "select * from seqfeature_key where seqfeature_key_id = ?"
- row = @dbh.execute(query, k_id).fetch
- row ? row['key_name'] : ''
- end
+ def feature_key(k_id)
+ query = "select * from term where term_id= ?"
+ row = @dbh.execute(query, k_id).fetch
+ row ? row['name'] : ''
+ end
- def feature_source(s_id)
- query = "select * from seqfeature_source where seqfeature_source_id = ?"
- row = @dbh.execute(query, s_id).fetch
- row ? row['source_name'] : ''
- end
+ def feature_source(s_id)
+ query = "select * from term where term_id = ?"
+ row = @dbh.execute(query, s_id).fetch
+ row ? row['name'] : ''
+ end
- def feature_locations(f_id)
- locations = []
- query = "select * from seqfeature_location where seqfeature_id = ?"
- @dbh.execute(query, f_id).fetch_all.each do |row|
- next unless row
+ def feature_locations(f_id)
+ locations = []
+ query = "select * from location where seqfeature_id = ?"
+ @dbh.execute(query, f_id).fetch_all.each do |row|
+ next unless row
- location = Bio::Location.new
- location.strand = row['seq_strand']
- location.from = row['seq_start']
- location.to = row['seq_end']
+ location = Bio::Location.new
+ location.strand = row['strand']
+ location.from = row['start_pos']
+ location.to = row['end_pos']
- xref = feature_locations_remote(row['seqfeature_location_id'])
- location.xref_id = xref.shift unless xref.empty?
+ xref = feature_locations_remote(row['dbxref_if'])
+ location.xref_id = xref.shift unless xref.empty?
- # just omit fuzzy location for now...
- #feature_locations_qv(row['seqfeature_location_id'])
+ # just omit fuzzy location for now...
+ #feature_locations_qv(row['seqfeature_location_id'])
- rank = row['location_rank'].to_i - 1
- locations[rank] = location
- end
- return Bio::Locations.new(locations)
+ rank = row['rank'].to_i - 1
+ locations[rank] = location
end
+ return Bio::Locations.new(locations)
+ end
- def feature_locations_remote(l_id)
- query = "select * from remote_seqfeature_name where seqfeature_location_id = ?"
- row = @dbh.execute(query, l_id).fetch
- row ? [row['accession'], row['version']] : []
- end
+ def feature_locations_remote(l_id)
+ query = "select * from dbxref where dbxref_id = ?"
+ row = @dbh.execute(query, l_id).fetch
+ row ? [row['accession'], row['version']] : []
+ end
- def feature_locations_qv(l_id)
- query = "select * from location_qualifier_value where seqfeature_location_id = ?"
- row = @dbh.execute(query, l_id).fetch
- row ? [row['qualifier_value'], row['slot_value']] : []
- end
+ def feature_locations_qv(l_id)
+ query = "select * from location_qualifier_value where location_id = ?"
+ row = @dbh.execute(query, l_id).fetch
+ row ? [row['value'], row['int_value']] : []
+ end
- def feature_qualifiers(f_id)
- qualifiers = []
- query = "select * from seqfeature_qualifier_value where seqfeature_id = ?"
- @dbh.execute(query, f_id).fetch_all.each do |row|
- next unless row
+ def feature_qualifiers(f_id)
+ qualifiers = []
+ query = "select * from seqfeature_qualifier_value where seqfeature_id = ?"
+ @dbh.execute(query, f_id).fetch_all.each do |row|
+ next unless row
- key = feature_qualifiers_key(row['seqfeature_qualifier_id'])
- value = row['qualifier_value']
- qualifier = Bio::Feature::Qualifier.new(key, value)
+ key = feature_qualifiers_key(row['seqfeature_id'])
+ value = row['value']
+ qualifier = Bio::Feature::Qualifier.new(key, value)
- rank = row['seqfeature_qualifier_rank'].to_i - 1
- qualifiers[rank] = qualifier
- end
- return qualifiers.compact # .compact is nasty hack for a while
+ rank = row['rank'].to_i - 1
+ qualifiers[rank] = qualifier
end
-
- def feature_qualifiers_key(q_id)
- query = "select * from seqfeature_qualifier where seqfeature_qualifier_id = ?"
- row = @dbh.execute(query, q_id).fetch
- row ? row['qualifier_name'] : ''
- end
+ return qualifiers.compact # .compact is nasty hack for a while
end
+ def feature_qualifiers_key(q_id)
+ query = <<-END
+ select * from seqfeature_qualifier_value
+ join term using(term_id) where seqfeature_id = ?
+ END
+ row = @dbh.execute(query, q_id).fetch
+ row ? row['name'] : ''
+ end
end
-end
+end # SQL
+end # Bio
+
if __FILE__ == $0
begin
require 'pp'
alias p pp
rescue LoadError
@@ -361,68 +360,6 @@
pp ent2.subseq(1,10)
pp ent2.taxonomy
pp ent2.version
end
-
-
-=begin
-
-= Bio::SQL
-
---- Bio::SQL.new(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
-
---- Bio::SQL.close
-
---- Bio::SQL#fetch(accession)
-
- Returns Bio::SQL::Sequence object.
-
-== Bio::SQL::Sequence
-
---- Bio::SQL::Sequence.new(dbh, entry)
-
---- Bio::SQL::Sequence#accession -> String
---- Bio::SQL::Sequence#comment -> String
-
- Returns the first comment. For complete comments, use comments method.
-
---- Bio::SQL::Sequence#comments -> Array
-
- Returns comments in an Array of Strings.
-
---- Bio::SQL::Sequence#common_name -> String
---- Bio::SQL::Sequence#database -> String
---- Bio::SQL::Sequence#date -> String
---- Bio::SQL::Sequence#dblink -> Array
---- Bio::SQL::Sequence#definition -> String
---- Bio::SQL::Sequence#division -> String
---- Bio::SQL::Sequence#entry_id -> String
-
---- Bio::SQL::Sequence#features
-
- Returns Bio::Features object.
-
---- Bio::SQL::Sequence#keyword -> String
---- Bio::SQL::Sequence#lineage -> String
---- Bio::SQL::Sequence#ncbi_taxa_id -> String
-
---- Bio::SQL::Sequence#references -> Array
-
- Returns reference informations in Array of Hash (not Bio::Reference).
-
---- Bio::SQL::Sequence#seq
-
- Returns Bio::Sequence::NA or AA object.
-
---- Bio::SQL::Sequence#subseq(from, to)
-
- Returns Bio::Sequence::NA or AA object (by lazy fetching).
-
---- Bio::SQL::Sequence#taxonomy -> DBI::Row
-
- Use lineage, common_name, ncbi_taxa_id methods to extract in detail.
-
---- Bio::SQL::Sequence#version -> String
-
-=end