require 'rubygems'
require 'ruby-debug'
require File.expand_path(File.join(File.dirname(__FILE__), 'obo_parser')) 

module OboParser::Utilities

  # Summarizes labels used by id in a two column tab delimited format 
  # Providing a cutoff will report only those ids/labels with > 1 label per id
  # Does not (yet) include reference to synonyms, this could be easily extended.
  #
  #== Example use
  #		of1 = File.read('foo1.obo')	
  #		of2 = File.read('foo2.obo')	
  #		of3 = File.read('foo3.obo')	
  #		of4 = File.read('foo4.obo')	
  # 
  #  OboParser::Utilities.dump_comparison_by_id(0,[of1, of2, of3, of4])
  #
  # @param [Integer] cutoff only Term ids with > cutoff labels will be reported 
  # @param [Array] files an Array of read files 
  # @return [String] the transation in tab delimted format
  def self.dump_comparison_by_id(cutoff = 0, files = [])
    return '' if files.size < 1

    of = [] 
    files.each_with_index do |f, i|
      of[i] = parse_obo_file(f)	
    end

    all_data = {}

    of.each do |f|
      tmp_hash = f.id_hash
      tmp_hash.keys.each do |id|
        if all_data[id]
          all_data[id].push(tmp_hash[id])
        else
          all_data[id] = [tmp_hash[id]]
        end
      end
    end

    all_data.keys.sort.each do |k|
      if all_data[k].uniq.size > cutoff 
        puts "#{k}\t#{all_data[k].uniq.join(', ')}"
      end
    end
  end

  # Returns all labels found in all passed ontologies. Does not yet include synonyms.
  #
  #== Example use
  #  of1 = File.read('fly_anatomy.obo')	
  #  of2 = File.read('hao.obo')	
  #  of3 = File.read('mosquito_anatomy.obo')	
  # 
  #  OboParser::Utilities.shared_labels([of1, of3])
  #
  # @param [Array] files an Array of read files 
  # @return [String] lables, one per line
  def self.shared_labels(files = []) 
    comparison = {}

    files.each do |f|
      o = parse_obo_file(f)
      o.term_hash.keys.each do |k|
        tmp = k.gsub(/adult/, "").strip
        tmp = k.gsub(/embryonic\/larval/, "").strip
        if comparison[tmp]
          comparison[tmp] += 1
        else
          comparison.merge!(tmp => 1)
        end
      end
    end

    match = [] 
    comparison.keys.each do |k|
      if comparison[k] == files.size 
        match.push k
      end
    end

    puts match.sort.join("\n")
    puts "\n#{match.length} total."

  end 


  #== Two column translation tools

HOMOLONTO_HEADER = %{
format-version: 1.2
auto-generated-by: obo_parser
default-namespace: fix_me

[Typedef]
id: OGEE:has_member
name: has_member
is_a: OBO_REL:relationship
def: "C has_member C', C is an homology group and C' is a biological object" []
comment: "We leave open the possibility that an homology group is a biological object. Thus, an homology group C may have C' has_member, with C' being an homology group."
is_transitive: true
is_anti_symmetric: true

}


  # Takes a two column input file, references it to two ontologies, and provides a report.
  #  
  #== Example use
  #  file = File.read('HAO_TGMA_list.txt')
  #  col1_obo = File.read('hao.obo')
  #  col2_obo = File.read('tgma.obo')
  #  
  #  OboParser::Utilities.column_translate(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo, :output => :homolonto)
  #== Output types
  # There are several output report types
  #   :xls - Translates the columns in the data_file to the option passed in :translate_to, the first matching against col1_obo, the second against col2_obo.  Returns an Excel file.
  #   :homolonto - Generates a homolonto compatible file to STDOUT
  #   :cols - Prints a two column format to STDOUT
  #
  # @param [Hash] options options.
  # @param [Symbol] data the two column data file.
  # @return [String] the transation in tab delimted format.
  def self.column_translate(options = {})
    opt = {
      :data => nil,
      :col1_obo => nil,
      :col2_obo => nil,
      :translate_to => :id,        # also :label
      :output => :cols,            # also :xls, :homolonto, :parent_match
      :parent_match_to => :is_a,   # only used when :output == :parent_match
      :output_filename => 'foo',
      :index_start => 0
    }.merge!(options)

    c1obo = parse_obo_file(opt[:col1_obo])
    c2obo = parse_obo_file(opt[:col2_obo])

    case opt[:output]
    when :xls
      Spreadsheet.client_encoding = 'UTF-8'
      book = Spreadsheet::Workbook.new
      sheet = book.create_worksheet
    when :homolonto
      s = HOMOLONTO_HEADER
      opt[:translate_to] = :id # force this in this mode
    end

    i = opt[:index_start]
    v1 = nil # a label like 'head'
    v2 = nil
    c1 = nil # an id 'FOO:123'
    c2 = nil

    opt[:data].split(/\n/).each do |row|
      i += 1
      c1, c2 =  row.split(/\t/).map(&:strip)

      if c1.nil? || c2.nil?
        puts
        next
      end

      # the conversion
      if opt[:translate_to] == :id
        if c1 =~ /.*\:.*/ # it's an id, leave it
          v1 = c1
        else
          v1 = c1obo.term_hash[c1]
        end
        if c2 =~ /.*\:.*/ 
          v2 = c2
        else
          v2 = c2obo.term_hash[c2]
        end
      else
        if c1 =~ /.*\:.*/ 
          v1 = c1obo.id_hash[c1]
        else
          v1 = c1
        end
        if c2 =~ /.*\:.*/ 
          v2 = c2obo.id_hash[c2]
        else
          v2 = c2
        end
      end

      case opt[:output]
      when :cols
        puts "#{v1}\t#{v2}"
      when :xls
        sheet[i,0] = v1
        sheet[i,1] = OboParser::Utilities.term_stanza_from_file(v1, opt[:col1_obo])
        sheet[i,2] = v2
        sheet[i,3] = OboParser::Utilities.term_stanza_from_file(v2, opt[:col2_obo])
      when :homolonto
        s << OboParser::Utilities.homolonto_stanza(i, c1obo.id_hash[v1] , v1, v2) # "#{c1obo.id_hash[v1]} ! #{c2obo.id_hash[v2]}"
        s << "\n\n"
      end
    end

    case opt[:output]
    when :xls
      book.write "#{opt[:output_filename]}.xls"
    when :homolonto 
      puts s + "\n"
    end

    true
  end

  # Takes a two column input file, references it to two ontologies, and returns a hash
  #  
  #== Example use
  #  file = File.read('HAO_TGMA_list.txt')
  #  col1_obo = File.read('hao.obo')
  #  col2_obo = File.read('tgma.obo')
  #  
  #  OboParser::Utilities.hashify_pairs(:data => file, :col1_obo => col1_obo, :col2_obo => col2_obo)
  #
  # @param [Hash] options options.
  # @param [Symbol] data the two column data file.
  # @param [Symbol] colo1_obo the OBO file referenced in the first column 
  # @param [Symbol] colo2_obo the OBO file referenced in the second column 
  # @return [Hash] a hash of {id string => id string}
  def self.hashify_pairs(options = {})
    opt = {
      :data => nil,
      :col1_obo => nil,
      :col2_obo => nil,
    }.merge!(options)
    
    c1obo = parse_obo_file(opt[:col1_obo])
    c2obo = parse_obo_file(opt[:col2_obo])

    hash = Hash.new

    i = opt[:index_start]
    v1 = nil # a label like 'head'
    v2 = nil
    c1 = nil # an id 'FOO:123'
    c2 = nil

    opt[:data].split(/\n/).each do |row|
      i += 1
      c1, c2 =  row.split(/\t/).map(&:strip)

      if c1.nil? || c2.nil?
        next
      end

      # the conversion
      if c1 =~ /.*\:.*/ # it's an id, leave it
        v1 = c1
      else
        v1 = c1obo.term_hash[c1]
      end
      if c2 =~ /.*\:.*/ 
        v2 = c2
      else
        v2 = c2obo.term_hash[c2]
      end
   
     hash.merge!(c1 => c2) 
    
    end
    return hash
  end


  # Returns a HomolOnto Stanza  
  #
  # @param [String] id an externally tracked id for the id: tag like '00001' 
  # @param [String] name a name for the name: tag
  # @param [Array] members a Array of 2 or more members for the relationship: has_member tag like ['FOO:123', 'BAR:456']
  # @return [String] the stanza requested 
  def self.homolonto_stanza(id, name, *members)
    return 'NOT ENOUGH RELATIONSHIPS' if members.length < 2
    s = []
    s << '[Term]'
    s << "id: HOG:#{id}"
    s << "name: #{name}"
    members.each do |m|
      s << "relationship: has_member #{m}"
    end
    s.join("\n")
  end


# Takes a Hash of OBO ontology files, an Array of relationships, and writes two input files (a network, and node properties) for Cytoscape
#  
#== Example use
# OboParser::Utilities.cytoscapify(:ontologies => {'HAO' => File.read('input/hao.obo'), 'TADS' => File.read('input/tads.obo'), 'TGMA' => File.read('input/tgma.obo'), 'FBBT' => File.read('input/fbbt.obo') }, :properties => ['is_a', 'part_of'])
#
# @param [Symbol] ontologies a Hash of #read files as values, keys as working names
# @param [Symbol] properties an Array of properties like ['is_a', 'part_of'] 
# TODO: @return File1, File2, Filen  
def self.cytoscapify(options = {})
  opt = {
    :ontologies => {},
    :properties => []
  }.merge!(options)

  return false if opt[:properties].empty?
  return false if opt[:ontologies].empty?

  nodes = File.new("nodes.tab", "w+")
  edges = File.new("edges.eda", "w+")

  opt[:ontologies].keys.each do |k|

    obo_file = parse_obo_file(opt[:ontologies][k])

    obo_file.terms.each do |t|
      nodes.puts [t.id.value, t.name.value, k].join("\t") + "\n"

      t.relationships.each do |rel, id|
        edges.puts [t.id.value, "(#{rel})", id].join("\t") + "\n" if opt[:properties].include?(rel)
      end
    end
  end

  nodes.close
  edges.close

  true

end


# Takes a two column input file, references it to two ontologies, and returns a report 
# that identifies data pairs that have parents who are also a data pair given a 
# provided property/relation type.
#  
#== Example use
#  file = File.read('HAO_TGMA_list.txt')
#  col1_obo = File.read('hao.obo')
#  col2_obo = File.read('tgma.obo')
#
# foo = OboParser::Utilities.parents(:data => data, :col1_obo => col1_obo, :col2_obo => col2_obo, :property => 'is_a')
#
# puts "-- NO (#{foo[:no].size})\n" 
# puts foo[:no].join("\n")
# puts "-- YES (#{foo[:yes].size})\n" 
# puts foo[:yes].join("\n")
#
# @param [Hash] options options.
# @param [Symbol] data the two column data file.
# @param [Symbol] colo1_obo the OBO file referenced in the first column 
# @param [Symbol] colo2_obo the OBO file referenced in the second column 
# @param [Symbol] property the OBO relationship/property to check against (e.g. 'is_a', 'part_of') 
# @return [Hash] a hash of {:yes => {}, :no => {}}
def self.parents(options = {})
  opt = {
    :data => nil,
    :col1_obo => nil,
    :col2_obo => nil,
    :property => nil
  }.merge!(options)

  return false if opt[:property].nil? 
  c1obo = parse_obo_file(opt[:col1_obo])
  c2obo = parse_obo_file(opt[:col2_obo])

  result = {:yes => [], :no => [], :unplaced => []}
  # update
  hash =  hashify_pairs(:data => opt[:data], :col1_obo => opt[:col1_obo], :col2_obo =>  opt[:col2_obo])

  obo1_hash = c1obo.id_index
  obo2_hash = c2obo.id_index

  hash.keys.each do |k|
    a = k
    b = hash[a]

    ids_1 = []
    ids_2 = []

    if !obo1_hash[a]
      puts "can't find #{k}\n"
      next
    end

    if !obo2_hash[b]
      puts "can't find #{k}\n"
      next
    end

    obo1_hash[a].relationships.each do |rel, id| 
      if rel == opt[:property] 
        ids_1.push id
      end
    end

    obo2_hash[b].relationships.each do |rel, id|
      if rel == opt[:property] 
        ids_2.push id
      end
    end

    unplaced = true

    ids_1.each do |c|
      ids_2.each do |d|
        t = "#{a} -> #{b}"
        if hash[c] == d
          result[:yes].push(t)
          unplaced = false
          next # don't add again after we find a hit
        else
          result[:no].push(t)
          unplaced = false
        end
      end
    end
    result[:unplaced] 

  end

  result
end


#== Helper methods that don't require the obo_parser library

  # Given a Term id and a String representing an OBO file returns that stanza. 
  #
  # @param [String] id a Term id like 'FOO:123' 
  # @param [String] file a Obo file as a String like File.read('my.obo') 
  # @return [String] the stanza requested 
  def self.term_stanza_from_file(id, file)
    foo = ""
    file =~ /(^\[Term\]\s*?id:\s*?#{id}.*?)(^\[Term\]|^\[Typedef\])/im
    foo = $1 if !$1.nil?
    foo.gsub(/\n\r/,"\n")
  end

end