lib/rbbt/sources/uniprot.rb in rbbt-sources-1.2.0 vs lib/rbbt/sources/uniprot.rb in rbbt-sources-2.0.0
- old
+ new
@@ -1,38 +1,36 @@
require 'rbbt/util/open'
require 'rbbt/resource'
require 'rbbt/sources/cath'
require 'rbbt/sources/uniprot'
-module Uniprot
+module UniProt
extend Resource
- self.subdir = "share/databases/Uniprot"
+ self.subdir = "share/databases/UniProt"
- Uniprot.claim Uniprot.annotated_variants, :proc do
+ UniProt.claim UniProt.annotated_variants, :proc do
url = "http://www.uniprot.org/docs/humsavar.txt"
tsv = TSV.open(CMD.cmd('tail -n +31 | head -n -4|grep "[[:alpha:]]"', :in => Open.open(url), :pipe => true),
- :fix => Proc.new{|line| parts = line.split(/\s+/); (parts[0..5] + [(parts[6..-1] || []) * " "]) * "\t"}, :type => :list,:key_field => "Associated Gene Name",
- :fields => ["Uniprot/SwissProt Accession", "Uniprot Variant ID", "Amino Acid Mutation", "Type of Variant", "SNP ID", "Disease"])
+ :fix => Proc.new{|line| parts = line.split(/\s+/); (parts[1..5] + [(parts[6..-1] || []) * " "]) * "\t"},
+ :type => :double,
+ :merge => true,
+ :key_field => "UniProt/SwissProt Accession",
+ :fields => ["UniProt Variant ID", "Amino Acid Mutation", "Type of Variant", "SNP ID", "Disease"])
tsv.unnamed = true
- tsv.process "Amino Acid Mutation" do |mutation|
- if mutation.match(/p\.(\w{3})(\d+)(\w{3})/)
- wt = Misc::THREE_TO_ONE_AA_CODE[$1.downcase]
- mut = Misc::THREE_TO_ONE_AA_CODE[$3.downcase]
- [wt, $2, mut] * ""
- else
- mutation
+ tsv.process "Amino Acid Mutation" do |mutations|
+ mutations.collect do |mutation|
+ if mutation.match(/p\.(\w{3})(\d+)(\w{3})/)
+ wt = Misc::THREE_TO_ONE_AA_CODE[$1.downcase]
+ mut = Misc::THREE_TO_ONE_AA_CODE[$3.downcase]
+ [wt, $2, mut] * ""
+ else
+ mutation
+ end
end
end
-
- uniprot_pos = tsv.identify_field "Uniprot/SwissProt Accession"
- mutation_pos = tsv.identify_field "Amino Acid Mutation"
- tsv.add_field "Mutated Isoform" do |key, values|
- [values[uniprot_pos], values[mutation_pos]] * ":"
- end
-
- tsv.reorder("Mutated Isoform").to_s
+ tsv.to_s
end
UNIPROT_TEXT="http://www.uniprot.org/uniprot/[PROTEIN].txt"
def self.pdbs(protein)
@@ -42,11 +40,16 @@
pdb = {}
text.split(/\n/).each{|l|
next unless l =~ /^DR\s+PDB; (.*)\./
id, method, resolution, region = $1.split(";").collect{|v| v.strip}
- chains, start, eend = region.match(/(\w+)=(\d+)-(\d+)/).values_at(1,2,3)
+ begin
+ chains, start, eend = region.match(/(\w+)=(\d+)-(\d+)/).values_at(1,2,3)
+ rescue
+ Log.warn("Error process Uniprot PDB line: #{line}")
+ next
+ end
pdb[id.downcase] = {:method => method, :resolution => resolution, :region => (start.to_i..eend.to_i), :chains => chains}
}
pdb
end
@@ -94,11 +97,10 @@
end
variants
end
-
def self.cath(protein)
url = UNIPROT_TEXT.sub "[PROTEIN]", protein
text = Open.read(url)
cath = {}
@@ -116,10 +118,10 @@
Cath.domains_for_pdb(pdb)
end.flatten.compact
end
def self.pdbs_covering_aa_position(protein, aa_position)
- Uniprot.pdbs(protein).select do |pdb, info|
+ UniProt.pdbs(protein).select do |pdb, info|
info[:region].include? aa_position
end
end
end