lib/rbbt/sources/uniprot.rb in rbbt-sources-2.1.5 vs lib/rbbt/sources/uniprot.rb in rbbt-sources-2.1.7

- old
+ new

@@ -1,7 +1,8 @@ -require 'rbbt' +require 'rbbt-util' require 'rbbt/util/open' +require 'rbbt/util/filecache' require 'rbbt/resource' require 'rbbt/sources/cath' require 'rbbt/sources/uniprot' module UniProt @@ -30,16 +31,82 @@ end end tsv.to_s end - UNIPROT_TEXT="http://www.uniprot.org/uniprot/[PROTEIN].txt" UNIPROT_FASTA="http://www.uniprot.org/uniprot/[PROTEIN].fasta" + + def self.get_uniprot_entry(uniprotids) + _array = Array === uniprotids + + uniprotids = [uniprotids] unless Array === uniprotids + uniprotids = uniprotids.compact.collect{|id| id} + + result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-{ID}.xml') do |ids| + result = {} + ids.each do |id| + begin + Misc.try3times do + + content = Open.read(UNIPROT_TEXT.sub("[PROTEIN]", id), :wget_options => {:quiet => true}, :nocache => true) + + result[id] = content + end + rescue + Log.error $!.message + end + end + result + end + + uniprots = {} + uniprotids.each{|id| uniprots[id] = Open.read(result_files[id]) } + + if _array + uniprots + else + uniprots.values.first + end + end + + def self.get_uniprot_sequence(uniprotids) + _array = Array === uniprotids + + uniprotids = [uniprotids] unless Array === uniprotids + uniprotids = uniprotids.compact.collect{|id| id} + + result_files = FileCache.cache_online_elements(uniprotids, 'uniprot-sequence-{ID}') do |ids| + result = {} + ids.each do |id| + begin + Misc.try3times do + + url = UNIPROT_FASTA.sub "[PROTEIN]", id + text = Open.read(url, :nocache => true) + + result[id] = text.split(/\n/).select{|line| line !~ /^>/} * "" + end + rescue + Log.error $!.message + end + end + result + end + + uniprots = {} + uniprotids.each{|id| uniprots[id] = Open.read(result_files[id]) } + + if _array + uniprots + else + uniprots.values.first + end + end + def self.pdbs(protein) - url = UNIPROT_TEXT.sub "[PROTEIN]", protein - text = Open.read(url) + text = get_uniprot_entry(protein) pdb = {} text.split(/\n/).each{|l| next unless l =~ /^DR\s+PDB; (.*)\./ @@ -57,30 +124,25 @@ } pdb end def self.sequence(protein) - url = UNIPROT_FASTA.sub "[PROTEIN]", protein - text = Open.read(url) - - text.split(/\n/).select{|line| line !~ /^>/} * "" + get_uniprot_sequence(protein) end def self.features(protein) - url = UNIPROT_TEXT.sub "[PROTEIN]", protein - text = Open.read(url) + text = get_uniprot_entry(protein) text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n" parts = text.split(/^(FT \w+)/) parts.shift features = [] type = nil parts.each do |part| - parts if part[0..1] == "FT" type = part.gsub(/FT\s+/,'') next end value = part.gsub("\nFT", '').gsub(/\s+/, ' ') @@ -109,12 +171,11 @@ features end def self.variants(protein) - url = UNIPROT_TEXT.sub "[PROTEIN]", protein - text = Open.read(url) + text = get_uniprot_entry(protein) text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n" parts = text.split(/^(FT \w+)/) parts.shift @@ -155,11 +216,10 @@ variants end def self.cath(protein) - url = UNIPROT_TEXT.sub "[PROTEIN]", protein - text = Open.read(url) + text = get_uniprot_entry(protein) cath = {} text.split(/\n/).each{|l| next unless l =~ /^DR\s+Gene3D; G3DSA:(.*)\./ id, description, cuantity = $1.split(";").collect{|v| v.strip}