#!/usr/proj/bioruby/bin/ruby # # biofetch.rb : BioFetch server (interface to GenomeNet/DBGET via KEGG API) # # Copyright (C) 2002-2004 KATAYAMA Toshiaki # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # # $Id: biofetch.rb,v 1.16 2005/08/07 10:02:41 k Exp $ # require 'cgi' require 'html/template' require 'bio/io/keggapi' MAX_ID_NUM = 50 module BioFetchError def print_text_page(str) print "Content-type: text/plain; charset=UTF-8\n\n" puts str exit end def error1(db) str = "ERROR 1 Unknown database [#{db}]." print_text_page(str) end def error2(style) str = "ERROR 2 Unknown style [#{style}]." print_text_page(str) end def error3(format, db) str = "ERROR 3 Format [#{format}] not known for database [#{db}]." print_text_page(str) end def error4(entry_id, db) str = "ERROR 4 ID [#{entry_id}] not found in database [#{db}]." print_text_page(str) end def error5(count) str = "ERROR 5 Too many IDs [#{count}]. Max [#{MAX_ID_NUM}] allowed." print_text_page(str) end def error6(info) str = "ERROR 6 Illegal information request [#{info}]." print_text_page(str) end end module KeggAPI include BioFetchError def list_databases serv = Bio::KEGG::API.new results = serv.list_databases results.collect {|x| x.entry_id} end def bget(db, id_list, format) serv = Bio::KEGG::API.new results = '' id_list.each do |query_id| entry_id = "#{db}:#{query_id}" result = serv.get_entries([entry_id]) if result.empty? error4(query_id, db) else results << result end end return results end end class BioFetch include BioFetchError include KeggAPI def initialize(db, id_list, style, format) check_style(style) check_format(format, db) check_number_of_id(id_list.length) check_dbname(db) if /html/.match(style) goto_html_style_page(db, id_list, format) end entries = bget(db, id_list, format) if /fasta/.match(format) and entries entries = convert_to_fasta_format(entries, db) end print_text_page(entries) end private def convert_to_fasta_format(str, db) require 'bio' require 'stringio' fasta = Array.new entries = StringIO.new(str) Bio::FlatFile.auto(entries) do |ff| ff.each do |entry| seq = nil if entry.respond_to?(:seq) seq = entry.seq elsif entry.respond_to?(:aaseq) seq = entry.aaseq elsif entry.respond_to?(:naseq) seq = entry.naseq end if seq entry_id = entry.respond_to?(:entry_id) ? entry.entry_id : '' definition = entry.respond_to?(:definition) ? entry.definition : '' fasta << seq.to_fasta("#{db}:#{entry_id} #{definition}", 60) end end end return fasta.join end def goto_html_style_page(db, id_list, format) url = "http://www.genome.jp/dbget-bin/www_bget" opt = '-f+' if /fasta/.match(format) ids = id_list.join('%2B') print "Location: #{url}?#{opt}#{db}+#{ids}\n\n" exit end def check_style(style) error2(style) unless /html|raw/.match(style) end def check_format(format, db) error3(format, db) if format && ! /fasta|default/.match(format) end def check_number_of_id(num) error5(num) if num > MAX_ID_NUM end def check_dbname(db) error1(db) unless list_databases.include?(db) end end class BioFetchInfo include BioFetchError include KeggAPI def initialize(info, db) @db = db begin send(info) rescue error6(info) end end private def dbs str = list_databases.sort.join(' ') print_text_page(str) end def formats fasta = " fasta" if check_fasta_ok str = "default#{fasta}" print_text_page(str) end def maxids str = MAX_ID_NUM.to_s print_text_page(str) end def check_fasta_ok # sequence databases supported by Bio::FlatFile.auto /genes|gb|genbank|genpept|rs|refseq|emb|sp|swiss|pir/.match(@db) end end class BioFetchCGI def initialize(cgi) @cgi = cgi show_page end private def show_page if info.empty? if id_list.empty? show_query_page else show_result_page(db, id_list, style, format) end else show_info_page(info, db) end end def show_query_page html = HTML::Template.new html.set_html(DATA.read) html.param('max_id_num' => MAX_ID_NUM) @cgi.out do html.output end end def show_result_page(db, id_list, style, format) BioFetch.new(db, id_list, style, format) end def show_info_page(info, db) BioFetchInfo.new(info, db) end def info @cgi['info'].downcase end def db @cgi['db'].downcase end def id_list @cgi['id'].split(/\W/) # not only ',' end def style s = @cgi['style'].downcase return s.empty? ? "html" : s end def format f = @cgi['format'].downcase return f.empty? ? "default" : f end end BioFetchCGI.new(CGI.new) =begin This program was created during BioHackathon 2002, Tucson and updated in Cape Town :) Rewrited in 2004 to use KEGG API as the bioruby.org server left from Kyoto University (where DBGET runs) and the old version could not run without having internally accessible DBGET server. =end __END__ BioFetch interface to GenomeNet/DBGET

BioFetch interface to GenomeNet/DBGET

This page allows you to retrieve up to entries at a time from various up-to-date biological databases.



Direct access

http://bioruby.org/cgi-bin/biofetch.rb?format=(default|fasta|...);style=(html|raw);db=(genbank|embl|...);id=ID[,ID,ID,...]

(NOTE: the option separator ';' can be '&')

format (optional)
default|fasta|...
style (required)
html|raw
db (required)
genbank|refseq|embl|swissprot|pir|prf|pdb|pdbstr|epd|transfac|prosite|pmd|litdb|omim|ligand|pathway|brite|genes|genome|linkdb|aaindex|...
id (required)
comma separated list of IDs

See the BioFetch specification for more details.

Server informations

What databases are available?
http://bioruby.org/cgi-bin/biofetch.rb?info=dbs
What formats does the database X have?
http://bioruby.org/cgi-bin/biofetch.rb?info=formats;db=embl
How many entries can be retrieved simultaneously?
http://bioruby.org/cgi-bin/biofetch.rb?info=maxids

Examples

gb:AJ617376 (default/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376
gb:AJ617376 (fasta/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=genbank;id=AJ617376
gb:AJ617376 (default/html)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=html;db=genbank;id=AJ617376
gb:AJ617376,AJ617377 (default/raw, multiple)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=AJ617376,AJ617377
embl:BUM (default/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=embl;id=BUM
sp:CYC_BOVIN (default/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=swissprot;id=CYC_BOVIN
sp:CYC_BOVIN (fasta/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=fasta;style=raw;db=swissprot;id=CYC_BOVIN
genes:b0015 (default/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genes;id=b0015
ps:PS00028 (default/raw)
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=prosite;id=PS00028

Errors

Error1 sample : DB not found
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=nonexistent;id=AJ617376"
Error2 sample : unknown style
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=nonexistent;db=genbank;id=AJ617376"
Error3 sample : unknown format
http://bioruby.org/cgi-bin/biofetch.rb?format=nonexistent;style=raw;db=genbank;id=AJ617376"
Error4 sample : ID not found
http://bioruby.org/cgi-bin/biofetch.rb?format=default;style=raw;db=genbank;id=nonexistent"
Error5 sample : too many IDs
http://bioruby.org/cgi-bin/biofetch.rb?style=raw;db=genes;id=1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51
Error6 sample : unknown info
http://bioruby.org/cgi-bin/biofetch.rb?info=nonexistent"

Other BioFetch implementations


staff@BioRuby.org