module MS ; end
module MS::Quant ; end

class MS::Quant::Qspec

  # personal communication with Hyungwon Choi: "We typically use nburn=2000,
  # niter=10000, which is quite sufficient to guarantee the reproducibility of
  # results using the same data."
  NBURNIN = 2000
  NITER = 10000
  INIT_HEADER = %w(protid protLen)
  DELIMITER = "\t"

  SUBMITTED_TO_QSPEC = 'submitted_to_qspec.txt'

  # takes an ordered list of conditions ['cond1', 'cond1', 'cond2', 'cond2'] and
  # returns an array of ints [0,0,0,1,1,1...]
  def self.conditions_to_ints(conditions)
    i = 0
    current_condition = conditions.first
    conditions.map do |cond| 
      if current_condition == cond ; i
      else
        i += 1
        current_condition = cond
        i
      end
    end
  end

  # returns an array of Results structs which is each row of the returned file
  # works with V2 of QSpec
  def self.results_array(resultsfile)
    rows = IO.readlines(resultsfile).map {|line| line.chomp.split("\t") }
    headers = rows.shift
    start_bayes = headers.index {|v| v =~ /BayesFactor/i }
    rows.map do |row| 
      data = [row[0]]
      data.push( row[1...start_bayes].map(&:to_f) )
      data.push( *row[start_bayes,4].map(&:to_f) )
      data.push( row[start_bayes+4] )
      Results.new(*data)
    end
  end

  # returns the right executable based on the array of conditions
  def self.executable(conditions)
    biggest_size = conditions.group_by {|v| v }.values.map(&:size).max
    (biggest_size >= 3) ? 'qspecgp' : 'qspec'
  end

  # protname_length_pairs is an array of doublets: [protname, length]
  # condition_to_count_array is an array doublets: [condition, array_of_counts]
  def initialize(protname_length_pairs, condition_to_count_array)
    @protname_length_pairs = protname_length_pairs
    @condition_to_count_array = condition_to_count_array
  end

  def conditions
    @condition_to_count_array.map(&:first)
  end

  # writes a qspec formatted file to filename
  def write(filename)
    ints = MS::Quant::Qspec.conditions_to_ints(conditions)
    header_cats = INIT_HEADER + ints
    rows = @protname_length_pairs.map {|pair| pair.map.to_a }
    @condition_to_count_array.each do |cond,counts|
      rows.zip(counts) {|row,cnt| row << cnt }
    end
    File.open(filename,'w') do |out|
      out.puts header_cats.join(DELIMITER)
      rows.each {|row| out.puts row.join(DELIMITER) }
    end
  end

  # returns an array of Qspec::Results objects (each object can be considered
  # a row of data)
  def run(normalize=true, opts={})
    puts "normalize: #{normalize}" if $VERBOSE
    tfile = Tempfile.new("qspec")
    write(tfile.path)
    if opts[:keep]
      local_file = File.join(Dir.pwd,File.basename(tfile.path))
      FileUtils.cp(tfile.path, local_file, :verbose => $VERBOSE)
      puts "(copy of) file submitted to qspec: #{local_file}" if $VERBOSE
    end
    qspec_exe = self.class.executable(conditions)
    cmd = [qspec_exe, tfile.path, NBURNIN, NITER, (normalize ? 1 : 0)].join(' ')
    if $VERBOSE
      puts "running #{cmd}" if $VERBOSE
    else
      cmd << " 2>&1"
    end
    reply = `#{cmd}`
    puts reply if $VERBOSE
    outfile = tfile.path + '_' + qspec_exe
    results = self.class.results_array(outfile)
    if opts[:keep]
      local_outfile = File.join(Dir.pwd, File.basename(outfile))
      FileUtils.cp(outfile, local_outfile, :verbose => $VERBOSE)
      puts "(copy of) file returned from qspec: #{outfile}"
    end
    tfile.unlink
    results
  end

  # for version 2 of QSpec
  # counts array is parallel to the experiment names passed in originally
  Results = Struct.new(:protid, :counts_array, :bayes_factor, :fold_change, :rb_stat, :fdr, :flag)
end