sql.rb in bio-1.3.0

- old
+ new
@@ -1,365 +1,186 @@
-#
-# = bio/io/sql.rb - BioSQL access module
-#
-# Copyright::  Copyright (C) 2002 Toshiaki Katayama <k@bioruby.org>
-# Copyright::  Copyright (C) 2006 Raoul Jean Pierre Bonnal <raoul.bonnal@itb.cnr.it>
-# License::    The Ruby License
-#
-# $Id: sql.rb,v 1.8 2007/04/05 23:35:41 trevor Exp $
-#
 
-begin
-  require 'dbi'
-rescue LoadError
-end
-require 'bio/sequence'
-require 'bio/feature'
+require 'rubygems'
+require 'erb'
+require 'composite_primary_keys'
+# BiosqlPlug
 
+=begin
+Ok Hilmar gives to me some clarification
+1) "EMBL/GenBank/SwissProt" name in term table, is only a convention assuming data loaded by genbank embl ans swissprot formats.
+   If your features come from others ways for example blast or alignment ... whatever.. the user as to take care about the source. 
 
-module Bio
 
-class SQL
-
-  def initialize(db = 'dbi:Mysql:biosql', user = nil, pass = nil)
-    @dbh = DBI.connect(db, user, pass)
-  end
-
-  def close
-    @dbh.disconnect
-  end
-
-  # Returns Bio::SQL::Sequence object.
-  def fetch(accession)	# or display_id for fall back
-    query = "select * from bioentry where accession = ?"
-    entry = @dbh.execute(query, accession).fetch
-    return Sequence.new(@dbh, entry) if entry
-
-    query = "select * from bioentry where display_id = ?"
-    entry = @dbh.execute(query, accession).fetch
-    return Sequence.new(@dbh, entry) if entry
-  end
-  alias get_by_id fetch
-
-
-  # for lazy fetching
-
-  class Sequence
-
-    def initialize(dbh, entry)
-      @dbh = dbh
-      @bioentry_id = entry['bioentry_id']
-      @database_id = entry['biodatabase_id']
-      @entry_id = entry['display_id']
-      @accession = entry['accession']
-      @version = entry['entry_version']
-      @division = entry['division']
+=end
+=begin
+TODO:
+1) source_term_id => surce_term and check before if the source term is present or not and the level, the root should always be something "EMBL/GenBank/SwissProt" or contestualized.
+2) Into DummyBase class delete connection there and use Bio::ArSQL.establish_connection which reads info from a yml file. 
+3) Chk Locations in Biofeatures ArSQL
+=end
+module Bio
+  class SQL	
+    #no check is made
+    def self.establish_connection(configurations, env)
+      #configurations is an hash similar what YAML returns.
+      #{:database=>"biorails_development", :adapter=>"postgresql", :username=>"rails", :password=>nil}
+      configurations.assert_valid_keys('development', 'production','test')
+      configurations[env].assert_valid_keys('hostname','database','adapter','username','password')
+      DummyBase.configurations = configurations
+      DummyBase.establish_connection "#{env}"
     end
-    attr_reader :accession, :division, :entry_id, :version
-
-
-    def to_fasta
-      if seq = seq
-        return seq.to_fasta(@accession)
-      end
+    
+    def self.fetch_id(id)
+      Bio::SQL::Bioentry.find(id)
     end
-
-    # Returns Bio::Sequence::NA or AA object.
-    def seq
-      query = "select * from biosequence where bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      return unless row
-
-      mol = row['alphabet']
-      seq = row['seq']
-
-      case mol
-      when /.na/i			# 'dna' or 'rna'
-        Bio::Sequence::NA.new(seq)
-      else				# 'protein'
-        Bio::Sequence::AA.new(seq)
-      end
+    
+    def self.fetch_accession(accession)
+      accession = accession.upcase
+      Bio::SQL::Bioentry.exists?(:accession => accession) ? Bio::SQL::Sequence.new(:entry=>Bio::SQL::Bioentry.find_by_accession(accession)) : nil			
     end
-
-    # Returns Bio::Sequence::NA or AA object (by lazy fetching).
-    def subseq(from, to)
-      length = to - from + 1
-      query = "select alphabet, substring(seq, ?, ?) as subseq" +
-              " from biosequence where bioentry_id = ?"
-      row = @dbh.execute(query, from, length, @bioentry_id).fetch
-      return unless row
-
-      mol = row['alphabet']
-      seq = row['subseq']
-
-      case mol
-      when /.na/i			# 'dna' or 'rna'
-        Bio::Sequence::NA.new(seq)
-      else				# 'protein'
-        Bio::Sequence::AA.new(seq)
-      end
+    
+    def self.exists_accession(accession)
+      Bio::SQL::Bioentry.find_by_accession(accession.upcase).nil? ? false : true
     end
-
-
-    # Returns Bio::Features object.
-    def features
-      array = []
-      query = "select * from seqfeature where bioentry_id = ?"
-      @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
-        next unless row
-
-        f_id = row['seqfeature_id']
-        k_id = row['type_term_id']
-        s_id = row['source_term_id']
-        rank = row['rank'].to_i - 1
-
-        # key : type (gene, CDS, ...)
-        type = feature_key(k_id)
-
-        # source : database (EMBL/GenBank/SwissProt)
-        database = feature_source(s_id)
-
-        # location : position
-        locations = feature_locations(f_id)
-
-        # qualifier
-        qualifiers = feature_qualifiers(f_id)
-
-        # rank
-        array[rank] = Bio::Feature.new(type, locations, qualifiers)
-      end
-      return Bio::Features.new(array)
+    
+    def self.exists_database(name)
+      Bio::SQL::Biodatabase.find_by_name(name).nil? ? false : true
     end
-
-
-    # Returns reference informations in Array of Hash (not Bio::Reference).
-    def references
-      array = []
-      query = <<-END
-        select * from bioentry_reference, reference
-        where bioentry_id = ? and
-        bioentry_reference.reference_id = reference.reference_id
-      END
-      @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
-        next unless row
-
-        hash = {
-          'start'	=> row['start_pos'],
-          'end'		=> row['end_pos'],
-          'journal'	=> row['location'],
-          'title'	=> row['title'],
-          'authors'	=> row['authors'],
-          'medline'	=> row['crc']
-        }
-        hash.default = ''
-
-        rank = row['rank'].to_i - 1
-        array[rank] = hash
-      end
-      return array
+    
+    def self.list_entries
+      Bio::SQL::Bioentry.find(:all).collect{|entry|
+        {:id=>entry.bioentry_id, :accession=>entry.accession}
+      }
     end
-
-
-    # Returns the first comment.  For complete comments, use comments method.
-    def comment
-      query = "select * from comment where bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      row ? row['comment_text'] : ''
+    
+    def self.list_databases
+      Bio::SQL::Biodatabase.find(:all).collect{|entry|
+        {:id=>entry.biodatabase_id, :name => entry.name}
+      }
     end
-
-    # Returns comments in an Array of Strings.
-    def comments
-      array = []
-      query = "select * from comment where bioentry_id = ?"
-      @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
-        next unless row
-        rank = row['rank'].to_i - 1
-        array[rank] = row['comment_text']
-      end
-      return array
+    
+    def self.delete_entry_id(id)
+      Bioentry.delete(id)
     end
-
-    def database
-      query = "select * from biodatabase where biodatabase_id = ?"
-      row = @dbh.execute(query, @database_id).fetch
-      row ? row['name'] : ''
+    
+    def self.delete_entry_accession(accession)
+      Bioentry.delete(Bioentry.find_by_accession(accession))
     end
+    
+    
+    class DummyBase <  ActiveRecord::Base
+      #NOTE: Using postgresql, not setting sequence name, system will discover the name by default.
+      #NOTE: this class will not establish the connection automatically
+      self.abstract_class = true			
+      self.pluralize_table_names = false
+      #prepend table name to the usual id, avoid to specify primary id for every table
+      self.primary_key_prefix_type = :table_name_with_underscore
+      #biosql_configurations=YAML::load(ERB.new(IO.read(File.join(File.dirname(__FILE__),'../config', 'database.yml'))).result)
+      #self.configurations=biosql_configurations
+      #self.establish_connection "development"
+    end #DummyBase
+    
+    autoload :Biodatabase, 'bio/io/biosql/biodatabase'
+    autoload :Bioentry, 'bio/io/biosql/bioentry'
+    autoload :BioentryDbxref, 'bio/io/biosql/bioentry_dbxref'
+    autoload :BioentryPath, 'bio/io/biosql/bioentry_path'
+    autoload :BioentryQualifierValue, 'bio/io/biosql/bioentry_qualifier_value'
+    autoload :BioentryReference, 'bio/io/biosql/bioentry_reference'
+    autoload :BioentryRelationship, 'bio/io/biosql/bioentry_relationship'
+    autoload :Biosequence, 'bio/io/biosql/biosequence'
+    autoload :Comment, 'bio/io/biosql/comment'
+    autoload :Dbxref, 'bio/io/biosql/dbxref'
+    autoload :DbxrefQualifierValue, 'bio/io/biosql/dbxref_qualifier_value'
+    autoload :Location, 'bio/io/biosql/location'
+    autoload :LocationQualifierValue, 'bio/io/biosql/location_qualifier_value'
+    autoload :Ontology, 'bio/io/biosql/ontology'
+    autoload :Reference, 'bio/io/biosql/reference'
+    autoload :Seqfeature, 'bio/io/biosql/seqfeature'
+    autoload :SeqfeatureDbxref, 'bio/io/biosql/seqfeature_dbxref'
+    autoload :SeqfeaturePath, 'bio/io/biosql/seqfeature_path'
+    autoload :SeqfeatureQualifierValue, 'bio/io/biosql/seqfeature_qualifier_value'
+    autoload :SeqfeatureRelationship, 'bio/io/biosql/seqfeature_relationship'
+    autoload :Taxon, 'bio/io/biosql/taxon'
+    autoload :TaxonName, 'bio/io/biosql/taxon_name'
+    autoload :Term, 'bio/io/biosql/term'
+    autoload :TermDbxref, 'bio/io/biosql/term_dbxref'
+    autoload :TermPath, 'bio/io/biosql/term_path'
+    autoload :TermRelationship, 'bio/io/biosql/term_relationship'
+    autoload :TermRelationshipTerm, 'bio/io/biosql/term_relationship_term'
+    autoload :Sequence, 'bio/db/biosql/sequence'
+  end #biosql
+  
+end #Bio
 
-    def date
-      query = "select * from bioentry_date where bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      row ? row['date'] : ''
-    end
+if __FILE__ == $0
+  require 'rubygems'
+  require 'composite_primary_keys'
+  require 'bio'
+  require 'pp'
+  
+  #  pp connection = Bio::SQL.establish_connection('bio/io/biosql/config/database.yml','development')
+  connection = Bio::SQL.establish_connection({'development'=>{'database'=>"bio_test", 'adapter'=>"postgresql", 'username'=>"rails", 'password'=>nil}},'development')
+  #pp YAML::load(ERB.new(IO.read('bio/io/biosql/config/database.yml')).result)
+  if true
+    #Bio::SQL.list_entries
 
-    def dblink
-      query = "select * from bioentry_direct_links where source_bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      row ? [row['dbname'], row['accession']] : []
-    end
+#  	biosequence = data.to_biosequence
+#	puts biosequence.output(:genbank)
+	db=Bio::SQL::Biodatabase.new(:name=>'JEFF', :authority=>'ME', :description=>'YOU')
+	db.save!
 
-    def definition
-      query = "select * from bioentry_description where bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      row ? row['description'] : ''
+    puts "### FileFile.auto"
+    if ARGV.size > 0
+	#embl = Bio::FlatFile.auto(ARGF.read)
+	Bio::FlatFile.auto(ARGF) do |ff|
+		ff.each do |data|
+			biosequence=data.to_biosequence
+			puts biosequence.output(:fasta)
+			sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
+			sqlseq.save
+			sqlseq.to_biosequence.output(:fasta)
+		end
+	end
+    else
+	require 'bio/io/fetch'
+	server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
+	data = Bio::EMBL.new(server.fetch('embl','AJ224123'))
     end
 
-    def keyword
-      query = "select * from bioentry_keywords where bioentry_id = ?"
-      row = @dbh.execute(query, @bioentry_id).fetch
-      row ? row['keywords'] : ''
-    end
+	
+#	sqlseq = Bio::SQL::Sequence.new(:biosequence=>biosequence,:biodatabase_id=>db.biodatabase_id)
+#	sqlseq.save
+#	sqlseq_bioseq=sqlseq.to_biosequence	
+#	puts sqlseq_bioseq.output(:genbank)
 
-    # Use lineage, common_name, ncbi_taxa_id methods to extract in detail.
-    def taxonomy
-      query = <<-END
-        select taxon_name.name, taxon.ncbi_taxon_id from bioentry
-        join taxon_name using(taxon_id) join taxon using (taxon_id)
-        where bioentry_id = ?
-      END
-      row = @dbh.execute(query, @bioentry_id).fetch
-#     @lineage = row ? row['full_lineage'] : ''
-      @common_name = row ? row['name'] : ''
-      @ncbi_taxa_id = row ? row['ncbi_taxon_id'] : ''
-      row ? [@lineage, @common_name, @ncbi_taxa_id] : []
-    end
 
-    def lineage
-      taxonomy unless @lineage
-      return @lineage
-    end
 
-    def common_name
-      taxonomy unless @common_name
-      return @common_name
-    end
-
-    def ncbi_taxa_id
-      taxonomy unless @ncbi_taxa_id
-      return @ncbi_taxa_id
-    end
-
-
-    private
-
-    def feature_key(k_id)
-      query = "select * from term where term_id= ?"
-      row = @dbh.execute(query, k_id).fetch
-      row ? row['name'] : ''
-    end
-
-    def feature_source(s_id)
-      query = "select * from term where term_id = ?"
-      row = @dbh.execute(query, s_id).fetch
-      row ? row['name'] : ''
-    end
-
-    def feature_locations(f_id)
-      locations = []
-      query = "select * from location where seqfeature_id = ?"
-      @dbh.execute(query, f_id).fetch_all.each do |row|
-        next unless row
-
-        location = Bio::Location.new
-        location.strand = row['strand']
-        location.from = row['start_pos']
-        location.to = row['end_pos']
-
-        xref = feature_locations_remote(row['dbxref_if'])
-        location.xref_id = xref.shift unless xref.empty?
-
-        # just omit fuzzy location for now...
-        #feature_locations_qv(row['seqfeature_location_id'])
-
-        rank = row['rank'].to_i - 1
-        locations[rank] = location
-      end
-      return Bio::Locations.new(locations)
-    end
-
-    def feature_locations_remote(l_id)
-      query = "select * from  dbxref where dbxref_id = ?"
-      row = @dbh.execute(query, l_id).fetch
-      row ? [row['accession'], row['version']] : []
-    end
-
-    def feature_locations_qv(l_id)
-      query = "select * from location_qualifier_value where location_id = ?"
-      row = @dbh.execute(query, l_id).fetch
-      row ? [row['value'], row['int_value']] : []
-    end
-
-    def feature_qualifiers(f_id)
-      qualifiers = []
-      query = "select * from seqfeature_qualifier_value where seqfeature_id = ?"
-      @dbh.execute(query, f_id).fetch_all.each do |row|
-        next unless row
-
-        key = feature_qualifiers_key(row['seqfeature_id'])
-        value = row['value']
-        qualifier = Bio::Feature::Qualifier.new(key, value)
-
-        rank = row['rank'].to_i - 1
-        qualifiers[rank] = qualifier
-      end
-      return qualifiers.compact	# .compact is nasty hack for a while
-    end
-
-    def feature_qualifiers_key(q_id)
-      query = <<-END
-        select * from seqfeature_qualifier_value
-        join term using(term_id) where seqfeature_id = ?
-      END
-      row = @dbh.execute(query, q_id).fetch
-      row ? row['name'] : ''
-    end
+	#    bioseq = Bio::SQL.fetch_accession('AJ224122')   
+	#    pp bioseq
+	#    pp bioseq.entry_id    
+    #TODO create a test only for tables not sequence here
+#    pp bioseq.molecule_type
+    #pp  bioseq.molecule_type.class
+    #bioseq.molecule_type_update('dna', 1)
+##    pp Bio::SQL::Taxon.find(8121).taxon_names
+    
+	    #sqlseq.to_biosequence
+	
+#	sqlseq.delete
+	
+#	db.destroy
   end
-
-end # SQL
-
-end # Bio
-
-
-if __FILE__ == $0
-  begin
-    require 'pp'
-    alias p pp
-  rescue LoadError
-  end
-
-  db = ARGV.empty? ? 'dbi:Mysql:database=biosql;host=localhost' : ARGV.shift
-  serv = Bio::SQL.new(db, 'root')
-
-  ent0 = serv.fetch('X76706')
-  ent0 = serv.fetch('A15H9FIB')
-  ent1 = serv.fetch('J01902')
-  ent2 = serv.fetch('X04311')
-
-  pp ent0.features
-  pp ent0.references
-
-  pp ent1.seq
-  pp ent1.seq.translate
-  pp ent1.seq.gc
-  pp ent1.subseq(1,20)
-
-  pp ent2.accession
-  pp ent2.comment
-  pp ent2.comments
-  pp ent2.common_name
-  pp ent2.database
-  pp ent2.date
-  pp ent2.dblink
-  pp ent2.definition
-  pp ent2.division
-  pp ent2.entry_id
-  pp ent2.features
-  pp ent2.keyword
-  pp ent2.lineage
-  pp ent2.ncbi_taxa_id
-  pp ent2.references
-  pp ent2.seq
-  pp ent2.subseq(1,10)
-  pp ent2.taxonomy
-  pp ent2.version
-
+  #pp  bioseq.molecule_type
+  #term = Bio::SQL::Term.find_by_name('mol_type')
+  #pp term
+  #pp bioseq.entry.bioentry_qualifier_values.create(:term=>term, :rank=>2, :value=>'pippo')
+  #pp bioseq.entry.bioentry_qualifier_values.inspect
+  #pp bioseq.entry.bioentry_qualifier_values.find_all_by_term_id(26)
+  #pp primo.class
+  #  pp primo.value='dna'
+  #  pp primo.save
+  #pp bioseq.molecule_type= 'prova'
+  
+  #Bio::SQL::BioentryQualifierValue.delete(delete.bioentry_id,delete.term_id,delete.rank)
+  
+  
 end
-