require 'rbbt-util'
require 'rbbt/sources/organism'

module InterPro
  extend Resource
  self.subdir = "share/databases/InterPro"

  InterPro.claim InterPro.source.protein2ipr, :url, "ftp://ftp.ebi.ac.uk/pub/databases/interpro/protein2ipr.dat.gz"

  InterPro.claim InterPro.protein_domains, :proc do
    organism = "Hsa"
    uniprot_colum = TSV::Parser.new(Organism.protein_identifiers(organism).open).all_fields.index("UniProt/SwissProt Accession")
    uniprots = CMD.cmd("grep -v  '^#'|cut -f #{uniprot_colum+1}", :in => Organism.protein_identifiers(organism).open).read.split("\n").collect{|l| l.split("|")}.flatten.uniq.reject{|l| l.empty?}
   
    tsv = nil
    TmpFile.with_file(uniprots * "\n") do |tmpfile|
        tsv = TSV.open(CMD.cmd("cut -f 1,2,5,6 | sort -u |grep -w -F -f #{ tmpfile }", :in => InterPro.source.protein2ipr.open, :pipe => true), :merge => true, :type => :double)
    end
 
    tsv.key_field = "UniProt/SwissProt Accession"
    tsv.fields = ["InterPro ID", "Domain Start AA", "Domain End AA"]
    tsv.to_s
  end

  InterPro.claim InterPro.domain_names, :proc do
    #tsv = InterPro.source.protein2ipr.tsv :key_field => 1, :fields => [2], :type => :single
    tsv = TSV.open(CMD.cmd("cut -f 2,3 | sort -u", :in => InterPro.source.protein2ipr.open, :pipe => true), :merge => true, :type => :single)
 
    tsv.key_field = "InterPro ID"
    tsv.fields = ["Domain Name"]
    tsv.to_s
  end

  def self.name_index
    @@name_index ||= InterPro.domain_names.tsv(:persist => true, :unnamed => true)
  end

  def self.gene_index
    @@gene_index ||= InterPro.protein_domains.tsv(:persist => true, :key_field => "InterPro ID", :fields => ["UniProt/SwissProt Accession"], :type => :flat, :merge => true, :unnamed => true)
  end

  def self.domain_index
    @@domain_index ||= InterPro.protein_domains.tsv(:persist => true, :unnamed => true, :key_field => "UniProt/SwissProt Accession", :fields => ["InterPro ID"], :merge => true)
  end

  def self.domain_position_index
    @@domain_position_index ||= InterPro.protein_domains.tsv(:persist => true, :unnamed => true, :key_field => "UniProt/SwissProt Accession", :fields => ["InterPro ID", "Domain Start AA", "Domain End AA"], :type => :double, :merge => true)
  end

  def self.ens2uniprot(organism)
    @@ens2uniprot_index ||= {}
    @@ens2uniprot_index[organism] ||= Organism.protein_identifiers(organism).tsv(:persist => true, :unnamed => true, :fields => ["UniProt/SwissProt Accession"], :key_field => "Ensembl Protein ID", :type => :double, :merge => true)
  end

end

if defined? Entity 
  module InterProDomain
    extend Entity
    self.format = "InterPro ID"

    self.annotation :organism
    property :description => :array2single do
      InterPro.name_index.values_at *self
    end

    property :name => :array2single do
      InterPro.name_index.values_at *self
    end

    property :proteins => :array2single do
      InterPro.gene_index.values_at(*self).
        collect{|genes| genes = genes.uniq;  genes.organism = organism if genes.respond_to? :organism; genes }.tap{|o| Protein.setup(o, "UniProt/SwissProt Accession", organism)}
    end

    property :genes => :array2single do
      InterPro.gene_index.values_at(*self).
        collect{|genes| genes = [] if genes.nil?; genes = genes.uniq;  genes.organism = organism if genes.respond_to? :organism; genes }.tap{|o| Gene.setup(o, "UniProt/SwissProt Accession", organism)}
    end
  end

  if defined? Protein and Entity === Protein
    module Protein
      property :interpro_domains => :array2single do
        self.collect do |protein|
          uniprot = (InterPro.ens2uniprot(protein.organism)[protein] || []).flatten
          uniprot.empty? ? nil : 
            InterPro.domain_index.values_at(*uniprot).compact.flatten.  each{|pth| pth.organism = organism if pth.respond_to? :organism }.uniq.tap{|o| InterProDomain.setup(o, organism)}
        end
      end

      property :interpro_domain_positions => :array2single do
        self.collect do |protein|
          if protein.nil?
            [].tap{|o| InterProDomain.setup(o, organism)}
          else
            uniprot = (InterPro.ens2uniprot(protein.organism)[protein] || []).flatten
            uniprot.empty? ? nil : 
              InterPro.domain_position_index.values_at(*uniprot).compact.flatten(1).tap{|o| InterProDomain.setup(o, organism)}
          end
        end
      end
    end
  end
end