lib/bio/io/fetch.rb in bio-1.4.3.0001 vs lib/bio/io/fetch.rb in bio-1.5.0
- old
+ new
@@ -3,28 +3,35 @@
#
# Copyright:: Copyright (C) 2002, 2005 Toshiaki Katayama <k@bioruby.org>,
# Copyright (C) 2006 Jan Aerts <jan.aerts@bbsrc.ac.uk>
# License:: The Ruby License
#
-# $Id:$
-#
# == DESCRIPTION
#
-# Using BioRuby BioFetch server
+# Using EBI Dbfetch server
#
-# br_server = Bio::Fetch.new()
-# puts br_server.databases
-# puts br_server.formats('embl')
-# puts br_server.maxids
-#
-# Using EBI BioFetch server
-#
-# ebi_server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
+# ebi_server = Bio::Fetch::EBI.new
+# puts ebi_server.fetch('embl', 'J00231')
# puts ebi_server.fetch('embl', 'J00231', 'raw')
# puts ebi_server.fetch('embl', 'J00231', 'html')
-# puts Bio::Fetch.query('genbank', 'J00231')
-# puts Bio::Fetch.query('genbank', 'J00231', 'raw', 'fasta')
+#
+# Getting metadata from EBI Dbfetch server
+#
+# puts ebi_server.databases
+# puts ebi_server.formats('embl')
+# puts ebi_server.maxids
+#
+# Using EBI Dbfetch server without creating a Bio::Fetch::EBI instance
+#
+# puts Bio::Fetch::EBI.query('ena_sequence', 'J00231')
+# puts Bio::Fetch::EBI.query('ena_sequence', 'J00231', 'raw', 'fasta')
+#
+# Using a BioFetch server with specifying URL
+#
+# server = Bio::Fetch.new('http://www.ebi.ac.uk/Tools/dbfetch/dbfetch')
+# puts server.fetch('ena_sequence', 'J00231')
+# puts server.fetch('ena_sequence', 'J00231', 'raw', 'fasta')
#
require 'uri'
require 'cgi'
require 'bio/command'
@@ -35,48 +42,108 @@
# a database name and an accession number, these servers return the associated
# record. For example, for the embl database on the EBI, that would be a
# nucleic or amino acid sequence.
#
# Possible dbfetch servers include:
- # * http://bioruby.org/cgi-bin/biofetch.rb (default)
- # * http://www.ebi.ac.uk/cgi-bin/dbfetch
+ # * http://www.ebi.ac.uk/Tools/dbfetch/dbfetch
#
+ # Note that old URL http://www.ebi.ac.uk/cgi-bin/dbfetch still alives
+ # probably because of compatibility, but using the new URL is recommended.
+ #
+ # Historically, there were other dbfetch servers including:
+ # * http://bioruby.org/cgi-bin/biofetch.rb (default before BioRuby 1.4)
+ # But they are unavailable now.
+ #
+ #
# If you're behind a proxy server, be sure to set your HTTP_PROXY
# environment variable accordingly.
#
# = USAGE
# require 'bio'
- #
+ #
# # Retrieve the sequence of accession number M33388 from the EMBL
# # database.
- # server = Bio::Fetch.new() #uses default server
+ # server = Bio::Fetch::EBI.new #uses EBI server
+ # puts server.fetch('ena_sequence','M33388')
+ #
+ # # database name "embl" can also be used though it is not officially listed
# puts server.fetch('embl','M33388')
- #
- # # Do the same thing without creating a Bio::Fetch object. This method always
- # # uses the default dbfetch server: http://bioruby.org/cgi-bin/biofetch.rb
- # puts Bio::Fetch.query('embl','M33388')
#
- # # To know what databases are available on the bioruby dbfetch server:
- # server = Bio::Fetch.new()
+ # # Do the same thing with explicitly giving the URL.
+ # server = Bio::Fetch.new(Bio::Fetch::EBI::URL) #uses EBI server
+ # puts server.fetch('ena_sequence','M33388')
+ #
+ # # Do the same thing without creating a Bio::Fetch::EBI object.
+ # puts Bio::Fetch::EBI.query('ena_sequence','M33388')
+ #
+ # # To know what databases are available on the dbfetch server:
+ # server = Bio::Fetch::EBI.new
# puts server.databases
#
# # Some databases provide their data in different formats (e.g. 'fasta',
# # 'genbank' or 'embl'). To check which formats are supported by a given
# # database:
# puts server.formats('embl')
#
class Fetch
+ # Bio::Fetch::EBI is a client of EBI Dbfetch
+ # (http://www.ebi.ac.uk/Tools/dbfetch/dbfetch).
+ #
+ # An instance of this class works the same as:
+ # obj = Bio::Fetch.new("http://www.ebi.ac.uk/Tools/dbfetch/dbfetch")
+ # obj.database = "ena_sequence"
+ #
+ # See the documents of Bio::Fetch for more details.
+ class EBI < Fetch
+
+ # EBI Dbfetch server URL
+ URL = "http://www.ebi.ac.uk/Tools/dbfetch/dbfetch".freeze
+
+ # For the usage, see the document of Bio::Fetch.new.
+ def initialize(url = URL)
+ @database = "ena_sequence"
+ super
+ end
+
+ # Shortcut for using EBI Dbfetch server. You can fetch an entry
+ # without creating an instance of Bio::Fetch::EBI. This method uses
+ # EBI Dbfetch server http://www.ebi.ac.uk/Tools/dbfetch/dbfetch .
+ #
+ # Example:
+ # puts Bio::Fetch::EBI.query('refseq','NM_123456')
+ # puts Bio::Fetch::EBI.query('ena_sequence','J00231')
+ #
+ # ---
+ # *Arguments*:
+ # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
+ # * _id_: single ID or ID list separated by commas or white space
+ # * _style_: [raw|html] (default = 'raw')
+ # * _format_: name of output format (see Bio::Fetch#formats)
+ def self.query(*args)
+ self.new.fetch(*args)
+ end
+ end #class EBI
+
# Create a new Bio::Fetch server object that can subsequently be queried
- # using the Bio::Fetch#fetch method
+ # using the Bio::Fetch#fetch method.
+ #
+ # You must specify _url_ of a server.
+ # The preset default server is deprecated.
+ #
+ # If you want to use a server without explicitly specifying the URL,
+ # use Bio::Fetch::EBI.new that uses EBI Dbfetch server.
+ #
# ---
# *Arguments*:
- # * _url_: URL of dbfetch server (default = 'http://bioruby.org/cgi-bin/biofetch.rb')
+ # * _url_: URL of dbfetch server. (no default value)
# *Returns*:: Bio::Fetch object
- def initialize(url = 'http://bioruby.org/cgi-bin/biofetch.rb')
+ def initialize(url = nil)
+ unless url then
+ raise ArgumentError, "No server URL is given in Bio::Fetch.new. The default server URL value have been deprecated. You must explicitly specify the url or use Bio::Fetch::EBI for using EBI Dbfetch."
+ end
@url = url
- schema, user, @host, @port, reg, @path, = URI.split(@url)
end
# The default database to query
#--
# This will be used by the get_by_id method
@@ -88,12 +155,12 @@
def get_by_id(id)
fetch(@database, id)
end
# Fetch a database entry as specified by database (db), entry id (id),
- # 'raw' text or 'html' (style), and format. When using BioRuby's
- # BioFetch server, value for the format should not be set.
+ # 'raw' text or 'html' (style), and format.
+ #
# Examples:
# server = Bio::Fetch.new('http://www.ebi.ac.uk/cgi-bin/dbfetch')
# puts server.fetch('embl','M33388','raw','fasta')
# puts server.fetch('refseq','NM_12345','html','embl')
# ---
@@ -109,49 +176,33 @@
query.push([ 'format', format ]) if format
_get(query)
end
- # Shortcut for using BioRuby's BioFetch server. You can fetch an entry
- # without creating an instance of BioFetch server. This method uses the
- # default dbfetch server, which is http://bioruby.org/cgi-bin/biofetch.rb
- #
- # Example:
- # puts Bio::Fetch.query('refseq','NM_12345')
- #
- # ---
- # *Arguments*:
- # * _database_: name of database to query (see Bio::Fetch#databases to get list of supported databases)
- # * _id_: single ID or ID list separated by commas or white space
- # * _style_: [raw|html] (default = 'raw')
- # * _format_: name of output format (see Bio::Fetch#formats)
- def self.query(*args)
- self.new.fetch(*args)
- end
-
# Using this method, the user can ask a dbfetch server what databases
# it supports. This would normally be the first step you'd take when
# you use a dbfetch server for the first time.
# Example:
# server = Bio::Fetch.new()
# puts server.databases # returns "aa aax bl cpd dgenes dr ec eg emb ..."
#
- # This method only works for the bioruby dbfetch server. For a list
- # of databases available from the EBI, see the EBI website at
- # http://www.ebi.ac.uk/cgi-bin/dbfetch/
+ # This method works for EBI Dbfetch server (and for the bioruby dbfetch
+ # server). Not all servers support this method.
# ---
# *Returns*:: array of database names
def databases
_get_single('info', 'dbs').strip.split(/\s+/)
end
# Lists the formats that are available for a given database. Like the
- # Bio::Fetch#databases method, this method is only available on
- # the bioruby dbfetch server.
+ # Bio::Fetch#databases method, not all servers support this method.
+ # This method is available on the EBI Dbfetch server (and on the bioruby
+ # dbfetch server).
+ #
# Example:
- # server = Bio::Fetch.new()
- # puts server.formats('embl') # returns "default fasta"
+ # server = Bio::Fetch::EBI.new()
+ # puts server.formats('embl') # returns [ "default", "annot", ... ]
# ---
# *Arguments*:
# * _database_:: name of database you want the supported formats for
# *Returns*:: array of formats
def formats(database = @database)
@@ -162,11 +213,16 @@
end
end
# A dbfetch server will only return entries up to a given maximum number.
# This method retrieves that number from the server. As for the databases
- # and formats methods, the maxids method only works for the bioruby
- # dbfetch server.
+ # and formats methods, not all servers support the maxids method.
+ # This method is available on the EBI Dbfetch server (and on the bioruby
+ # dbfetch server).
+ #
+ # Example:
+ # server = Bio::Fetch::EBI.new
+ # puts server.maxids # currently returns 200
# ---
# *Arguments*: none
# *Returns*:: number
def maxids
_get_single('info', 'maxids').to_i