Sha256: 28957f4e2e1693cbc656edc20d3bdcc6826bc4b1452f9354cfbda793253d47b3

Contents?: true

Size: 1.98 KB

Versions: 51

Compression:

Stored size: 1.98 KB

Contents

require 'rbbt-util'
require 'rbbt/resource'

module PhosphoELM
  extend Resource
  self.subdir = 'share/databases/PhosphoELM'

  def self.organism(org="Hsa")
    require 'rbbt/sources/organism'
    Organism.default_code(org)
  end

  #self.search_paths = {}
  #self.search_paths[:default] = :lib

  PhosphoELM.claim PhosphoELM[".source/dump.tgz"], :proc do |file|
    raise "Place phosphoELM_all_latest.dump.tgz from http://phospho.elm.eu.org at #{file}. Please consult license."
  end

  PhosphoELM.claim PhosphoELM.data, :proc do 
    tgz = PhosphoELM[".source/dump.tgz"].produce.find

    organism = PhosphoELM.organism
    uni2ensp = Organism.identifiers(organism).tsv :key_field => "UniProt/SwissProt Accession", :fields => ["Ensembl Protein ID"], :type => :flat, :persist => true
    ensp2seq = Organism.protein_sequence(organism).tsv :persist => true

    dumper = TSV::Dumper.new(:key_field => "Phosphosite", :fields => ["Kinases", "Source", "PMID"], :type => :list)
    dumper.init
    TmpFile.with_file do |dir|
      Misc.in_dir dir do
        CMD.cmd("tar xvfz #{tgz}")
        f = Dir.glob("*.dump").first
        TSV.traverse Open.open(f), :type => :array, :into => dumper do |line|
          next unless line =~ /Homo sapiens/
          acc, sequence, position, code, pmids, kinases, source, species, entry_date = line.split("\t")
          ensps = uni2ensp[acc]
          Log.warn "No Ensembl Protein ID for #{acc}" if ensps.nil?
          next if ensps.nil?
          sequence << "*"
          good = ensps.select{|ensp| sequence == ensp2seq[ensp]}
          Log.warn "No sequence match for #{acc} - #{ensps*", "}" if good.empty?
          next if good.empty?
          res = []
          good.each do |ensp|
            phospho_site = [ensp,":", code, position] * ""
            res << [phospho_site, [kinases, source, pmids]]
          end
          res.extend MultipleResult

          res
        end
      end
    end
    dumper.stream
  end
end

iif PhosphoELM.data.produce(true).find if __FILE__ == $0

Version data entries

51 entries across 51 versions & 1 rubygems

Version Path
rbbt-sources-3.4.2 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.4.1 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.3.0 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.16 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.15 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.14 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.13 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.12 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.11 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.10 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.9 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.8 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.6 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.4 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.3 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.2 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.1 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.2.0 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.1.52 lib/rbbt/sources/phospho_ELM.rb
rbbt-sources-3.1.51 lib/rbbt/sources/phospho_ELM.rb