lib/rbbt/sources/uniprot.rb in rbbt-sources-2.0.2 vs lib/rbbt/sources/uniprot.rb in rbbt-sources-2.1.0
- old
+ new
@@ -1,5 +1,6 @@
+require 'rbbt'
require 'rbbt/util/open'
require 'rbbt/resource'
require 'rbbt/sources/cath'
require 'rbbt/sources/uniprot'
@@ -31,10 +32,11 @@
tsv.to_s
end
UNIPROT_TEXT="http://www.uniprot.org/uniprot/[PROTEIN].txt"
+ UNIPROT_FASTA="http://www.uniprot.org/uniprot/[PROTEIN].fasta"
def self.pdbs(protein)
url = UNIPROT_TEXT.sub "[PROTEIN]", protein
text = Open.read(url)
pdb = {}
@@ -42,17 +44,72 @@
text.split(/\n/).each{|l|
next unless l =~ /^DR\s+PDB; (.*)\./
id, method, resolution, region = $1.split(";").collect{|v| v.strip}
begin
chains, start, eend = region.match(/(\w+)=(\d+)-(\d+)/).values_at(1,2,3)
+ start = start.to_i
+ eend = eend.to_i
+ start, eend = eend, start if start > eend
rescue
Log.warn("Error process Uniprot PDB line: #{line}")
next
end
- pdb[id.downcase] = {:method => method, :resolution => resolution, :region => (start.to_i..eend.to_i), :chains => chains}
+ pdb[id.downcase] = {:method => method, :resolution => resolution, :region => (start..eend), :chains => chains}
}
pdb
end
+
+ def self.sequence(protein)
+ url = UNIPROT_FASTA.sub "[PROTEIN]", protein
+ text = Open.read(url)
+
+ text.split(/\n/).select{|line| line !~ /^>/} * ""
+ end
+
+ def self.features(protein)
+ url = UNIPROT_TEXT.sub "[PROTEIN]", protein
+ text = Open.read(url)
+
+ text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n"
+
+ parts = text.split(/^(FT \w+)/)
+ parts.shift
+
+ features = []
+
+ type = nil
+ parts.each do |part|
+ parts
+ if part[0..1] == "FT"
+ type = part.gsub(/FT\s+/,'')
+ next
+ end
+ value = part.gsub("\nFT", '').gsub(/\s+/, ' ')
+ case
+ when value.match(/(\d+) (\d+) (.*)/)
+ start, eend, description = $1, $2, $3
+ description.gsub(/^FT\s+/m, '')
+ when value.match(/(\d+) (\d+)/)
+ start, eend = $1, $2
+ description = nil
+ else
+ Log.debug "Value not understood: #{ value }"
+ end
+
+
+ feature = {
+ :type => type,
+ :start => start.to_i,
+ :end => eend.to_i,
+ :description => description,
+ }
+
+ features << feature
+ end
+
+ features
+ end
+
def self.variants(protein)
url = UNIPROT_TEXT.sub "[PROTEIN]", protein
text = Open.read(url)