lib/rbbt/sources/uniprot.rb in rbbt-sources-2.0.2 vs lib/rbbt/sources/uniprot.rb in rbbt-sources-2.1.0

- old
+ new

@@ -1,5 +1,6 @@ +require 'rbbt' require 'rbbt/util/open' require 'rbbt/resource' require 'rbbt/sources/cath' require 'rbbt/sources/uniprot' @@ -31,10 +32,11 @@ tsv.to_s end UNIPROT_TEXT="http://www.uniprot.org/uniprot/[PROTEIN].txt" + UNIPROT_FASTA="http://www.uniprot.org/uniprot/[PROTEIN].fasta" def self.pdbs(protein) url = UNIPROT_TEXT.sub "[PROTEIN]", protein text = Open.read(url) pdb = {} @@ -42,17 +44,72 @@ text.split(/\n/).each{|l| next unless l =~ /^DR\s+PDB; (.*)\./ id, method, resolution, region = $1.split(";").collect{|v| v.strip} begin chains, start, eend = region.match(/(\w+)=(\d+)-(\d+)/).values_at(1,2,3) + start = start.to_i + eend = eend.to_i + start, eend = eend, start if start > eend rescue Log.warn("Error process Uniprot PDB line: #{line}") next end - pdb[id.downcase] = {:method => method, :resolution => resolution, :region => (start.to_i..eend.to_i), :chains => chains} + pdb[id.downcase] = {:method => method, :resolution => resolution, :region => (start..eend), :chains => chains} } pdb end + + def self.sequence(protein) + url = UNIPROT_FASTA.sub "[PROTEIN]", protein + text = Open.read(url) + + text.split(/\n/).select{|line| line !~ /^>/} * "" + end + + def self.features(protein) + url = UNIPROT_TEXT.sub "[PROTEIN]", protein + text = Open.read(url) + + text = text.split(/\n/).select{|line| line =~ /^FT/} * "\n" + + parts = text.split(/^(FT \w+)/) + parts.shift + + features = [] + + type = nil + parts.each do |part| + parts + if part[0..1] == "FT" + type = part.gsub(/FT\s+/,'') + next + end + value = part.gsub("\nFT", '').gsub(/\s+/, ' ') + case + when value.match(/(\d+) (\d+) (.*)/) + start, eend, description = $1, $2, $3 + description.gsub(/^FT\s+/m, '') + when value.match(/(\d+) (\d+)/) + start, eend = $1, $2 + description = nil + else + Log.debug "Value not understood: #{ value }" + end + + + feature = { + :type => type, + :start => start.to_i, + :end => eend.to_i, + :description => description, + } + + features << feature + end + + features + end + def self.variants(protein) url = UNIPROT_TEXT.sub "[PROTEIN]", protein text = Open.read(url)