lib/rbbt/sources/uniprot.rb in rbbt-sources-2.1.5 vs lib/rbbt/sources/uniprot.rb in rbbt-sources-2.1.7
- old
+ new
@@ -1,7 +1,8 @@
-require 'rbbt'
+require 'rbbt-util'
require 'rbbt/util/open'
+require 'rbbt/util/filecache'
require 'rbbt/resource'
require 'rbbt/sources/cath'
require 'rbbt/sources/uniprot'
module UniProt
@@ -30,16 +31,82 @@
end
end
tsv.to_s
end
-
UNIPROT_TEXT="http://www.uniprot.org/uniprot/[PROTEIN].txt"
UNIPROT_FASTA="http://www.uniprot.org/uniprot/[PROTEIN].fasta"
+
+ def self.get_uniprot_entry(uniprotids)
+ _array = Array === uniprotids
+
+ uniprotids = [uniprotids] unless Array === uniprotids
+ uniprotids = uniprotids.compact.collect{|id| id}
+
+ result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-{ID}.xml') do |ids|
+ result = {}
+ ids.each do |id|
+ begin
+ Misc.try3times do
+
+ content = Open.read(UNIPROT_TEXT.sub("[PROTEIN]", id), :wget_options => {:quiet => true}, :nocache => true)
+
+ result[id] = content
+ end
+ rescue
+ Log.error $!.message
+ end
+ end
+ result
+ end
+
+ uniprots = {}
+ uniprotids.each{|id| uniprots[id] = Open.read(result_files[id]) }
+
+ if _array
+ uniprots
+ else
+ uniprots.values.first
+ end
+ end
+
+ def self.get_uniprot_sequence(uniprotids)
+ _array = Array === uniprotids
+
+ uniprotids = [uniprotids] unless Array === uniprotids
+ uniprotids = uniprotids.compact.collect{|id| id}
+
+ result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-sequence-{ID}') do |ids|
+ result = {}
+ ids.each do |id|
+ begin
+ Misc.try3times do
+
+ url = UNIPROT_FASTA.sub "[PROTEIN]", id
+ text = Open.read(url, :nocache => true)
+
+ result[id] = text.split(/\n/).select{|line| line !~ /^>/} * ""
+ end
+ rescue
+ Log.error $!.message
+ end
+ end
+ result
+ end
+
+ uniprots = {}
+ uniprotids.each{|id| uniprots[id] = Open.read(result_files[id]) }
+
+ if _array
+ uniprots
+ else
+ uniprots.values.first
+ end
+ end
+
def self.pdbs(protein)
- url = UNIPROT_TEXT.sub "[PROTEIN]", protein
- text = Open.read(url)
+ text = get_uniprot_entry(protein)
pdb = {}
text.split(/\n/).each{|l|
next unless l =~ /^DR\s+PDB; (.*)\./
@@ -57,30 +124,25 @@
}
pdb
end
def self.sequence(protein)
- url = UNIPROT_FASTA.sub "[PROTEIN]", protein
- text = Open.read(url)
-
- text.split(/\n/).select{|line| line !~ /^>/} * ""
+ get_uniprot_sequence(protein)
end
def self.features(protein)
- url = UNIPROT_TEXT.sub "[PROTEIN]", protein
- text = Open.read(url)
+ text = get_uniprot_entry(protein)
text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n"
parts = text.split(/^(FT \w+)/)
parts.shift
features = []
type = nil
parts.each do |part|
- parts
if part[0..1] == "FT"
type = part.gsub(/FT\s+/,'')
next
end
value = part.gsub("\nFT", '').gsub(/\s+/, ' ')
@@ -109,12 +171,11 @@
features
end
def self.variants(protein)
- url = UNIPROT_TEXT.sub "[PROTEIN]", protein
- text = Open.read(url)
+ text = get_uniprot_entry(protein)
text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n"
parts = text.split(/^(FT \w+)/)
parts.shift
@@ -155,11 +216,10 @@
variants
end
def self.cath(protein)
- url = UNIPROT_TEXT.sub "[PROTEIN]", protein
- text = Open.read(url)
+ text = get_uniprot_entry(protein)
cath = {}
text.split(/\n/).each{|l|
next unless l =~ /^DR\s+Gene3D; G3DSA:(.*)\./
id, description, cuantity = $1.split(";").collect{|v| v.strip}