lib/bio/io/sql.rb in bio-1.2.1 vs lib/bio/io/sql.rb in bio-1.3.0
- old
+ new
@@ -1,365 +1,186 @@
-#
-# = bio/io/sql.rb - BioSQL access module
-#
-# Copyright:: Copyright (C) 2002 Toshiaki Katayama <k@bioruby.org>
-# Copyright:: Copyright (C) 2006 Raoul Jean Pierre Bonnal <raoul.bonnal@itb.cnr.it>
-# License:: The Ruby License
-#
-# $Id: sql.rb,v 1.8 2007/04/05 23:35:41 trevor Exp $
-#
-begin
- require 'dbi'
-rescue LoadError
-end
-require 'bio/sequence'
-require 'bio/feature'
+require 'rubygems'
+require 'erb'
+require 'composite_primary_keys'
+# BiosqlPlug
+=begin
+Ok Hilmar gives to me some clarification
+1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
+ If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source.
-module Bio
-class SQL
-
- def initialize(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
- @dbh = DBI.connect(db, user, pass)
- end
-
- def close
- @dbh.disconnect
- end
-
- # Returns Bio::SQL::Sequence object.
- def fetch(accession) # or display_id for fall back
- query = "select * from bioentry where accession = ?"
- entry = @dbh.execute(query, accession).fetch
- return Sequence.new(@dbh, entry) if entry
-
- query = "select * from bioentry where display_id = ?"
- entry = @dbh.execute(query, accession).fetch
- return Sequence.new(@dbh, entry) if entry
- end
- alias get_by_id fetch
-
-
- # for lazy fetching
-
- class Sequence
-
- def initialize(dbh, entry)
- @dbh = dbh
- @bioentry_id = entry['bioentry_id']
- @database_id = entry['biodatabase_id']
- @entry_id = entry['display_id']
- @accession = entry['accession']
- @version = entry['entry_version']
- @division = entry['division']
+=end
+=begin
+TODO:
+1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
+2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file.
+3) Chk Locations in Biofeatures ArSQL
+=end
+module Bio
+ class SQL
+ #no check is made
+ def self.establish_connection(configurations, env)
+ #configurations is an hash similar what YAML returns.
+ #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
+ configurations.assert_valid_keys('development', 'production','test')
+ configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
+ DummyBase.configurations = configurations
+ DummyBase.establish_connection "#{env}"
end
- attr_reader :accession, :division, :entry_id, :version
-
-
- def to_fasta
- if seq = seq
- return seq.to_fasta(@accession)
- end
+
+ def self.fetch_id(id)
+ Bio::SQL::Bioentry.find(id)
end
-
- # Returns Bio::Sequence::NA or AA object.
- def seq
- query = "select * from biosequence where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- return unless row
-
- mol = row['alphabet']
- seq = row['seq']
-
- case mol
- when /.na/i # 'dna' or 'rna'
- Bio::Sequence::NA.new(seq)
- else # 'protein'
- Bio::Sequence::AA.new(seq)
- end
+
+ def self.fetch_accession(accession)
+ accession = accession.upcase
+ Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil
end
-
- # Returns Bio::Sequence::NA or AA object (by lazy fetching).
- def subseq(from, to)
- length = to - from + 1
- query = "select alphabet, substring(seq, ?, ?) as subseq" +
- " from biosequence where bioentry_id = ?"
- row = @dbh.execute(query, from, length, @bioentry_id).fetch
- return unless row
-
- mol = row['alphabet']
- seq = row['subseq']
-
- case mol
- when /.na/i # 'dna' or 'rna'
- Bio::Sequence::NA.new(seq)
- else # 'protein'
- Bio::Sequence::AA.new(seq)
- end
+
+ def self.exists_accession(accession)
+ Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
end
-
-
- # Returns Bio::Features object.
- def features
- array = []
- query = "select * from seqfeature where bioentry_id = ?"
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
- next unless row
-
- f_id = row['seqfeature_id']
- k_id = row['type_term_id']
- s_id = row['source_term_id']
- rank = row['rank'].to_i - 1
-
- # key : type (gene, CDS, ...)
- type = feature_key(k_id)
-
- # source : database (EMBL/GenBank/SwissProt)
- database = feature_source(s_id)
-
- # location : position
- locations = feature_locations(f_id)
-
- # qualifier
- qualifiers = feature_qualifiers(f_id)
-
- # rank
- array[rank] = Bio::Feature.new(type, locations, qualifiers)
- end
- return Bio::Features.new(array)
+
+ def self.exists_database(name)
+ Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
end
-
-
- # Returns reference informations in Array of Hash (not Bio::Reference).
- def references
- array = []
- query = <<-END
- select * from bioentry_reference, reference
- where bioentry_id = ? and
- bioentry_reference.reference_id = reference.reference_id
- END
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
- next unless row
-
- hash = {
- 'start' => row['start_pos'],
- 'end' => row['end_pos'],
- 'journal' => row['location'],
- 'title' => row['title'],
- 'authors' => row['authors'],
- 'medline' => row['crc']
- }
- hash.default = ''
-
- rank = row['rank'].to_i - 1
- array[rank] = hash
- end
- return array
+
+ def self.list_entries
+ Bio::SQL::Bioentry.find(:all).collect{|entry|
+ {:id=>entry.bioentry_id, :accession=>entry.accession}
+ }
end
-
-
- # Returns the first comment. For complete comments, use comments method.
- def comment
- query = "select * from comment where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- row ? row['comment_text'] : ''
+
+ def self.list_databases
+ Bio::SQL::Biodatabase.find(:all).collect{|entry|
+ {:id=>entry.biodatabase_id, :name => entry.name}
+ }
end
-
- # Returns comments in an Array of Strings.
- def comments
- array = []
- query = "select * from comment where bioentry_id = ?"
- @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
- next unless row
- rank = row['rank'].to_i - 1
- array[rank] = row['comment_text']
- end
- return array
+
+ def self.delete_entry_id(id)
+ Bioentry.delete(id)
end
-
- def database
- query = "select * from biodatabase where biodatabase_id = ?"
- row = @dbh.execute(query, @database_id).fetch
- row ? row['name'] : ''
+
+ def self.delete_entry_accession(accession)
+ Bioentry.delete(Bioentry.find_by_accession(accession))
end
+
+
+ class DummyBase < ActiveRecord::Base
+ #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
+ #NOTE: this class will not establish the connection automatically
+ self.abstract_class = true
+ self.pluralize_table_names = false
+ #prepend table name to the usual id, avoid to specify primary id for every table
+ self.primary_key_prefix_type = :table_name_with_underscore
+ #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
+ #self.configurations=biosql_configurations
+ #self.establish_connection "development"
+ end #DummyBase
+
+ autoload :Biodatabase, 'bio/io/biosql/biodatabase'
+ autoload :Bioentry, 'bio/io/biosql/bioentry'
+ autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
+ autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
+ autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
+ autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
+ autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
+ autoload :Biosequence, 'bio/io/biosql/biosequence'
+ autoload :Comment, 'bio/io/biosql/comment'
+ autoload :Dbxref, 'bio/io/biosql/dbxref'
+ autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
+ autoload :Location, 'bio/io/biosql/location'
+ autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
+ autoload :Ontology, 'bio/io/biosql/ontology'
+ autoload :Reference, 'bio/io/biosql/reference'
+ autoload :Seqfeature, 'bio/io/biosql/seqfeature'
+ autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
+ autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
+ autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
+ autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
+ autoload :Taxon, 'bio/io/biosql/taxon'
+ autoload :TaxonName, 'bio/io/biosql/taxon_name'
+ autoload :Term, 'bio/io/biosql/term'
+ autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
+ autoload :TermPath, 'bio/io/biosql/term_path'
+ autoload :TermRelationship, 'bio/io/biosql/term_relationship'
+ autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
+ autoload :Sequence, 'bio/db/biosql/sequence'
+ end #biosql
+
+end #Bio
- def date
- query = "select * from bioentry_date where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- row ? row['date'] : ''
- end
+if __FILE__ == $0
+ require 'rubygems'
+ require 'composite_primary_keys'
+ require 'bio'
+ require 'pp'
+
+ # pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
+ connection = Bio::SQL.establish_connection({'development'=>{'database'=>"bio_test", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
+ #pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
+ if true
+ #Bio::SQL.list_entries
- def dblink
- query = "select * from bioentry_direct_links where source_bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- row ? [row['dbname'], row['accession']] : []
- end
+# biosequence = data.to_biosequence
+# puts biosequence.output(:genbank)
+ db=Bio::SQL::Biodatabase.new(:name=>'JEFF', :authority=>'ME', :description=>'YOU')
+ db.save!
- def definition
- query = "select * from bioentry_description where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- row ? row['description'] : ''
+ puts "### FileFile.auto"
+ if ARGV.size > 0
+ #embl = Bio::FlatFile.auto(ARGF.read)
+ Bio::FlatFile.auto(ARGF) do |ff|
+ ff.each do |data|
+ biosequence=data.to_biosequence
+ puts biosequence.output(:fasta)
+ sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
+ sqlseq.save
+ sqlseq.to_biosequence.output(:fasta)
+ end
+ end
+ else
+ require 'bio/io/fetch'
+ server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
+ data = Bio::EMBL.new(server.fetch('embl','AJ224123'))
end
- def keyword
- query = "select * from bioentry_keywords where bioentry_id = ?"
- row = @dbh.execute(query, @bioentry_id).fetch
- row ? row['keywords'] : ''
- end
+
+# sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
+# sqlseq.save
+# sqlseq_bioseq=sqlseq.to_biosequence
+# puts sqlseq_bioseq.output(:genbank)
- # Use lineage, common_name, ncbi_taxa_id methods to extract in detail.
- def taxonomy
- query = <<-END
- select taxon_name.name, taxon.ncbi_taxon_id from bioentry
- join taxon_name using(taxon_id) join taxon using (taxon_id)
- where bioentry_id = ?
- END
- row = @dbh.execute(query, @bioentry_id).fetch
-# @lineage = row ? row['full_lineage'] : ''
- @common_name = row ? row['name'] : ''
- @ncbi_taxa_id = row ? row['ncbi_taxon_id'] : ''
- row ? [@lineage, @common_name, @ncbi_taxa_id] : []
- end
- def lineage
- taxonomy unless @lineage
- return @lineage
- end
- def common_name
- taxonomy unless @common_name
- return @common_name
- end
-
- def ncbi_taxa_id
- taxonomy unless @ncbi_taxa_id
- return @ncbi_taxa_id
- end
-
-
- private
-
- def feature_key(k_id)
- query = "select * from term where term_id= ?"
- row = @dbh.execute(query, k_id).fetch
- row ? row['name'] : ''
- end
-
- def feature_source(s_id)
- query = "select * from term where term_id = ?"
- row = @dbh.execute(query, s_id).fetch
- row ? row['name'] : ''
- end
-
- def feature_locations(f_id)
- locations = []
- query = "select * from location where seqfeature_id = ?"
- @dbh.execute(query, f_id).fetch_all.each do |row|
- next unless row
-
- location = Bio::Location.new
- location.strand = row['strand']
- location.from = row['start_pos']
- location.to = row['end_pos']
-
- xref = feature_locations_remote(row['dbxref_if'])
- location.xref_id = xref.shift unless xref.empty?
-
- # just omit fuzzy location for now...
- #feature_locations_qv(row['seqfeature_location_id'])
-
- rank = row['rank'].to_i - 1
- locations[rank] = location
- end
- return Bio::Locations.new(locations)
- end
-
- def feature_locations_remote(l_id)
- query = "select * from dbxref where dbxref_id = ?"
- row = @dbh.execute(query, l_id).fetch
- row ? [row['accession'], row['version']] : []
- end
-
- def feature_locations_qv(l_id)
- query = "select * from location_qualifier_value where location_id = ?"
- row = @dbh.execute(query, l_id).fetch
- row ? [row['value'], row['int_value']] : []
- end
-
- def feature_qualifiers(f_id)
- qualifiers = []
- query = "select * from seqfeature_qualifier_value where seqfeature_id = ?"
- @dbh.execute(query, f_id).fetch_all.each do |row|
- next unless row
-
- key = feature_qualifiers_key(row['seqfeature_id'])
- value = row['value']
- qualifier = Bio::Feature::Qualifier.new(key, value)
-
- rank = row['rank'].to_i - 1
- qualifiers[rank] = qualifier
- end
- return qualifiers.compact # .compact is nasty hack for a while
- end
-
- def feature_qualifiers_key(q_id)
- query = <<-END
- select * from seqfeature_qualifier_value
- join term using(term_id) where seqfeature_id = ?
- END
- row = @dbh.execute(query, q_id).fetch
- row ? row['name'] : ''
- end
+ # bioseq = Bio::SQL.fetch_accession('AJ224122')
+ # pp bioseq
+ # pp bioseq.entry_id
+ #TODO create a test only for tables not sequence here
+# pp bioseq.molecule_type
+ #pp bioseq.molecule_type.class
+ #bioseq.molecule_type_update('dna', 1)
+## pp Bio::SQL::Taxon.find(8121).taxon_names
+
+ #sqlseq.to_biosequence
+
+# sqlseq.delete
+
+# db.destroy
end
-
-end # SQL
-
-end # Bio
-
-
-if __FILE__ == $0
- begin
- require 'pp'
- alias p pp
- rescue LoadError
- end
-
- db = ARGV.empty? ? 'dbi:Mysql:database=biosql;host=localhost' : ARGV.shift
- serv = Bio::SQL.new(db, 'root')
-
- ent0 = serv.fetch('X76706')
- ent0 = serv.fetch('A15H9FIB')
- ent1 = serv.fetch('J01902')
- ent2 = serv.fetch('X04311')
-
- pp ent0.features
- pp ent0.references
-
- pp ent1.seq
- pp ent1.seq.translate
- pp ent1.seq.gc
- pp ent1.subseq(1,20)
-
- pp ent2.accession
- pp ent2.comment
- pp ent2.comments
- pp ent2.common_name
- pp ent2.database
- pp ent2.date
- pp ent2.dblink
- pp ent2.definition
- pp ent2.division
- pp ent2.entry_id
- pp ent2.features
- pp ent2.keyword
- pp ent2.lineage
- pp ent2.ncbi_taxa_id
- pp ent2.references
- pp ent2.seq
- pp ent2.subseq(1,10)
- pp ent2.taxonomy
- pp ent2.version
-
+ #pp bioseq.molecule_type
+ #term = Bio::SQL::Term.find_by_name('mol_type')
+ #pp term
+ #pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
+ #pp bioseq.entry.bioentry_qualifier_values.inspect
+ #pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
+ #pp primo.class
+ # pp primo.value='dna'
+ # pp primo.save
+ #pp bioseq.molecule_type= 'prova'
+
+ #Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
+
+
end
-