require 'spec_id'
require 'optparse'
require 'ostruct'
require 'spec_id/aa_freqs'
require 'shuffle'
require 'vec'
require 'table'


########################################################
WRITE_CYS_FIND = false
########################################################


module SpecID
  attr_accessor :orig_peps, :passed_peps, :passed_prots
  # The filename passed in for filtering
  attr_accessor :passed_in_filename

  # returns the top peptide hits per file dta (first_scan + charge)
  # all hits with same score as top score are returned
  # assumes that all fields are strings...
  # converts xcorr, deltacn, deltamass, mass, and charge into numerical types
  # deletes the protein array (but not relevant proteins)
  # hashes on [pep.basename, pep.first_scan.to_i, pep.charge.to_i]
  # sets the @orig_peps attribute to those passing
  def top_peps_prefilter!
    ## Bioworks peps are text based and need to be transformed first
    if peps.first.is_a? Bioworks::Pep 
      peps.each do |pep|
        pep.xcorr = pep.xcorr.to_f
        pep.deltacn = pep.deltacn.to_f
        pep.deltamass = pep.deltamass.to_f
        pep.mass = pep.mass.to_f
        pep.charge = pep.charge.to_i
        pep.first_scan = pep.first_scan.to_i
      end
    end
    ## Srf Peps need no transformation!

    # get the top peptide by firstscan/charge (equivalent to .out files)
    top_peps = []
    self.peps.hash_by {|pep| [pep.base_name, pep.first_scan, pep.charge]}.values.map do |v|
      #self.peps.hash_by {|pep| [pep.aaseq, pep.charge]}.values.map do |v|
      best_to_worst = v.sort_by {|pep| pep.xcorr}.reverse
      top_score = best_to_worst.first.xcorr
      best_to_worst.each do |pep|
        if pep.xcorr == top_score
          top_peps << pep
        else ; break
        end
      end
    end
    @orig_peps = top_peps
  end

  # (xcorr1, xcorr2, xcorr3, deltacn, ppm)
  # interface very unstable.  For now, keeping it very loose...
  # assumed that peptide xcorr, deltacn, deltamass, mass, ppm are Floats
  # assumed that peptide charge is Integer
  # returns peps_passed
  # must respond to 'peps'
  # DOES NOT UPDATE the prot.peps attribute!!
  def filter_sequest(args, include_deltacnstar=false)
    (x1, x2, x3, deltacn, ppm) = args
    self.peps.select do |pep|
      # have to add the upper limit to deltacn because the lowest score is often
      # assigned a 1.10 in bioworks!
      pep_deltacn = pep.deltacn
      pep_charge = pep.charge

      ## The outer parentheses are critical to getting the correct answer!
      passing = ( (pep_deltacn >= deltacn) and ((pep_charge == 1 && pep.xcorr >= x1) or (pep_charge == 2 && pep.xcorr >= x2) or (pep_charge == 3 && pep.xcorr >= x3)) and ( pep.ppm <= ppm ))

      if passing
        if !include_deltacnstar && pep_deltacn > 1.0
          false
        else
          true
        end
      else
        false
      end
    end
  end


  # given some list of SpecID::Pep based objects, finds the list of proteins
  # associated with those peptides
  # update_prot_peps => when true, updates prot.peps attribute given the list
  # of pephits
  # kind = 
  # :no_update (current proteins are returned, but their peps attribute
  # is not updated)
  # :update (current proteins returned with peps attribute updated)
  # :new (new proteins are created complete with peps attribute)
  def self.passing_proteins(pephits, kind=:no_update)

    orig_pephits_prts = []
    if kind == :new
      new_prots = {}
      pephits.each_with_index do |pep,i|
        orig_pephits_prts[i] = pep.prots
        peps_new_prts = pep.prots.map do |prt|
          if new_prots.key? prt.reference
            already_exists = new_prots[prt.reference]
          else
            np = prt.dup
            np.peps = []
            new_prots[np.reference] = np
            np
          end
        end
        pep.prots = peps_new_prts
      end
    end

    if kind == :update
      pephits.each do |pep|
        pep.prots.each do |prt|
          prt.peps = []
        end
      end
    end

    prot_set = {}
    pephits.each do |pep|
      prts = pep.prots
      prts.each do |prt|
        prot_set[ prt.reference ] = prt 
      end
      if (kind == :update || kind == :new)
        prts.each do |prt|
          prt.peps << pep
        end
      end
    end

    ## Reset the original protein hits
    if kind == :new
      pephits.each_with_index do |pep,i|
        pep.prots = orig_pephits_prts[i]
      end
    end

    prot_set.values
  end
end


class SpecID::Filter

  NUM_PROT_FPPR_ITERATIONS = 10

  def self.run_from_argv(argv)
    obj = self.new
    obj.run_from_argv(argv)
  end

  def run_from_argv(argv)
    reply = get_options(argv) 
    return unless reply
    files, opt = reply

    #files = ARGV.map {|file| file }
    #ARGV.clear

    $stderr.puts "reading files (can take a minute or two for large files)..." if $VERBOSE
    spec_ids = files.map do |file|
      spec_id = file_to_prefiltered_spec_id(file, opt)
      spec_id
    end

    ## the options hash
    hash = {} 
    if opt.cys
      if opt.cys[1]
        opt.cys[1] = opt.cys[1].to_f
      else
        opt.cys[1] = 0.0
      end
      hash[:cys] = opt.cys
    end


    hash[:tps] = 
      if opt.tps
        Fasta.new.read_file(opt.tps).prots.map do |prot|
          prot.aaseq.chomp
        end
      end

    hash[:dcy] = 
      if opt.false
        new_spec_ids = []
        prefixes_or_files = SpecID.extend_args(opt.false, files.size)
        false_spec_ids = spec_ids.zip(prefixes_or_files).map do |spec_id, prefix_or_file|
          if File.exist? prefix_or_file
            new_spec_ids << spec_id
            file_to_prefiltered_spec_id(prefix_or_file, opt)
          else 
            (tps, fps) = spec_id.classify_by_false_flag(:peps, prefix_or_file, true, opt.prefix)
            fps_specid = spec_id.class.new
            tps_specid = spec_id.class.new

            fps_specid.peps = fps
            tps_specid.peps = tps
            new_spec_ids << tps_specid
            fps_specid
          end
        end
        spec_ids = new_spec_ids
        false_spec_ids 
      end

    defaults = {
      :dcy => nil, # { spec_id => false_spec_id }
      :cys => nil, # [cys_background_freq, cys_containing_freq]
      :tps => nil,
      :tmm => nil,
      :occams_razor => opt.occams_razor,
    }
    args = defaults.merge hash


    base_args = [opt.x1, opt.x2, opt.x3, opt.c, opt.ppm]

    #################################################### <-- 
    @fppr_methods = [:tmm, :tps, :cys, :dcy].select do |x|
      args[x]
    end
    @groups_reporting = [:pephits, :aaseq, :prothits]
    @groups_reporting.push( :occams_razor ) if args[:occams_razor]

    @cat_labels = {
      :pephits => 'pep_hits', 
      :prothits => 'prot_hits',
      :aaseq => 'uniq_aa_hits',
      :occams_razor => 'occams_prot_hits',
    }
    #################################################### <-- 

    if opt.log
      @logfh = File.open(opt.log, 'w')
    else
      @logfh = nil
    end
    #########################################
    # PRINT FILTER LEGEND
    out filter_legend(@fppr_methods)
    #########################################

    if opt.filters_file
      lines = IO.readlines(opt.filters_file)
      lines.each do |line|
        line.chomp!
        answer = prep_reply(line, base_args)
        next if answer == false
        base_args = answer
        filter_round(spec_ids, base_args, args)
      end
    elsif opt.i
      ## CLEAR ARGV (since otherwise, gets reads it!)
      ARGV.clear
      out interactive_help
      reply = "nil"
      loop do
        b = base_args
        out "#{b[0]} #{b[1]} #{b[2]} dcn:#{b[3]} ppm:#{b[4]}"
        loop do
          reply = gets.chomp
          answer = prep_reply(reply, base_args)
          if answer == false
            out interactive_help
          else
            base_args = answer
            filter_round(spec_ids, base_args, args)
            break
          end
        end
      end
    else
      filter_round(spec_ids, base_args, args)
    end

    if opt.log
      @logfh.close
    end

  end

  def out(string)
    puts string
    if @logfh
      @logfh.puts string
    end
  end

  # takes a fasta file or a string ( to be cast as a float )
  def get_cys_freq(arg)
    if File.exist? arg
      SpecID::AAFreqs.new(arg).aafreqs[:C]
    else
      arg.to_f
    end
  end

  # prints shortened number for display
  def short(num)
    sprintf( "%.3f",num)
  end

  # if good arguments, returns [files_array, options]
  # else prints an error argument and returns nil
  def get_options(argv)
    dup_argv = argv.dup

    opt = OpenStruct.new
    opt.x1 = 1.0
    opt.x2 = 1.5
    opt.x3 = 2.0
    opt.c = 0.1
    opt.ppm = 1000.0
    opt.false = false

    opts = OptionParser.new do |op|
      op.banner = "usage: #{File.basename(__FILE__)} [OPTS] <bioworks.xml | bioworks.srg>"
      op.separator("prints number of peptides/proteins ID'd at given thresholds")
      op.separator "only top hit (by xcorr) per scan+charge is considered"

      #op.separator("** 'dcn*' is the number of peptides with deltacn == 1.1")
      #op.separator("   (these are peptides who are the only hit with xcorr > 0)")
      op.separator ""
      op.on("-1", "--xcorr1 N", Float, "xcorr for +1 charge  d: #{opt.x1}") {|v| opt.x1 = v}
      op.on("-2", "--xcorr2 N", Float, "xcorr for +2 charge  d: #{opt.x2}") {|v| opt.x2 = v}
      op.on("-3", "--xcorr3 N", Float, "xcorr for +3 charge  d: #{opt.x3}") {|v| opt.x3 = v}
      op.on("-c", "--deltacn N", Float, ">= deltacn  d: #{opt.c}") {|v| opt.c = v} 
      op.on("-p", "--ppm N", Float,     "<= ppm               d: #{opt.ppm}") {|v| opt.ppm = v} 
      op.separator "                                     if bioworks.xml, = 10^6deltamass/mass"
      op.on("-i", "--interactive", "interactive filtering") {|v| opt.i = v} 
      op.on("-f", "--false a,b,c", Array, "flag for false proteins or filenames of decoys") {|v| opt.false = v} 
      op.separator("                                     e.g., for Bioworks: 'REVERSE'")
      op.separator("                                     (last given will apply to remaining files)")
      op.on("--prefix", "match false flag for prefixes only") {|v| opt.prefix = v} 
      op.on("-y", "--cys <fasta_file|freq,[bkg]>", Array, "report fpr by expected cysteine freq") do |v| 
        v[0] = get_cys_freq(v[0])
        opt.cys = v
      end
      op.separator("                                     freq = freq of cysteine as amino acid")
      op.separator("                                     [bkg] = freq of cys containing peps  d: 0.0")
      op.on("--filters_file <file>", "(no -i) file with list of interactive input") {|v| opt.filters_file = v}
      op.on("-t", "--tps <fasta>", "fasta file containing true hits") {|v| opt.tps = v }
      #op.on("--tmm <toppred.out>", "toppred.out file with transmembr. topology") {|v| opt.tps = v }
      op.on("--yaml", "spits out yaml-ized data") {|v| opt.tabulate = v }
      op.on("--combined_score", "shows the combined score") {|v| opt.combined_score = v }
      op.on("--marshal", "will write marshaled data or read existing") {|v| opt.marshal = v }
      op.on("--log <file>", "also writes all output to file") {|v| opt.log = v }
      ## NEED TO IMPLEMENT THIS:
      #op.on("--protein_summary", "writes passing proteins to .summary.html files") {|v| opt.protein_summary = v }
      op.on("-z", "--occams_razor", "will show minimal set of proteins") {|v| opt.occams_razor = v }
    end

    opts.parse!(dup_argv)

    if dup_argv.size < 1
      puts opts
      return nil
    end

    [dup_argv, opt]
  end

  # (actual # with cys, expected # with cys, total#peptides,
  # mean_fraction_of_cysteines_true, std)
  # PepHit(C) = Peptide containing cysteine
  #   # Total PepHit(C)                   # Observed Bad Pep (C)
  #   ------------------ proportional_to  ---------------------- 
  #   # Total PepHit                      # Total Bad PepHit (X)
  #  returns the fppr and the total number false
  def fppr_by_cysteines(ac_num_with_cys, exp_num_with_cys, total_peptides, mean_fraction_true_cys=nil, std_fraction_true_cys=nil) 

    # the number of bona fide BAD cysteine hits
    # (some of the cysteine hits (~5%) are true positives)

    ac_num_with_cys -= exp_num_with_cys * mean_fraction_true_cys if mean_fraction_true_cys
    if ac_num_with_cys < 0.0 ; ac_num_with_cys = 0.0 end
    total_number_false = (ac_num_with_cys * total_peptides).to_f/exp_num_with_cys
    fppr = total_number_false / total_peptides
    [fppr, total_number_false]
  end

  # num_peps_per_protein is an array of the number of peptides per protein hit
  # (these are the true hits)
  # assumes that the number follows a gaussian distribution (binomial
  # distributions tend toward gaussians, I believe, at large N)
  # returns [mean_num_wrong, mean_fppr, stdev_num_wrong, stdev_fppr] fppr
  def protein_fppr( num_peps_per_protein, number_false_peptides, num_iterations=10)

    ## Check for more false peptides than peptides in our proteins:
    total_protein_peps = 0
    contained = num_peps_per_protein.each do |num|
      total_protein_peps += num
    end
    ## All peptides will be wrong every time!
    ## which means all proteins will be wrong every time!
    if number_false_peptides >= total_protein_peps
      # [all proteins wrong, fppr=1.0
      return [num_peps_per_protein.size, 1.0, 0.0, 0.0]
    end


    num_prots = num_peps_per_protein.size
    sample = VecD.new(num_iterations)
    # indexed by peptide_number, pointing to a protein's peptide_count
    # we shuffle the indices and then walk along until we are finished
    # then we count how many proteins still have peptides

    # we create an array to hold the peptide number for each protein, then we
    # can reference the same entity when subtracting the peptides in the
    # algorithm
    cont_pep_num_per_prot_ars = (0...num_iterations).map do |i|
      total_protein_peps = 0
      contained = num_peps_per_protein.map do |num|
        [num]
      end
    end

    cont_num_by_pep_index_ars = cont_pep_num_per_prot_ars.map do |ar|
      index_count = 0
      pc_ar = []
      ar.each do |contained_num|
        contained_num.first.times do
          pc_ar[index_count] = contained_num
          index_count += 1
        end
      end
      pc_ar
    end

    indices = (0...(cont_num_by_pep_index_ars.first.size)).map {|x| x }
     

    (0...num_iterations).each do |i|
      num_false = 0
      indices.shuffle!
      pc = cont_num_by_pep_index_ars[i]
      number_false_peptides.times do |shuffle_index|
        #big_i = indices[shuffle_index]
        pc[indices[shuffle_index]][0] -= 1
      end
      cont_pep_num_per_prot_ars[i].each do |contained_pep_count|
        if contained_pep_count.first == 0
          num_false += 1
        end
      end
      sample[i] = num_false
    end
    (mean_num_wrong, stdev) = sample.sample_stats
    mean_fppr = mean_num_wrong / num_prots
    stdev_fppr = stdev / num_prots
    [mean_num_wrong, mean_fppr, stdev, stdev_fppr]
  end

  # returns [total_number_false, fppr, fraction_expected]
  # also takes a hash of pephits keyed on :aaseq
  def fraction_false_by_cysteines(pephits, cys_bg_freq, cys_containing_freq)
    (ac, exp) = SpecID::AAFreqs.new.actual_and_expected_number_containing_cysteines(pephits, cys_bg_freq)
    fraction_of_expected = ac.to_f/exp

    (cys_fprate, total_num_false) = fppr_by_cysteines(ac, exp, pephits.size, cys_containing_freq)
    [total_num_false, cys_fprate, fraction_of_expected]
  end

  def report_cysteines
    #### UNDERWAY:::
    cys_tps = pep_nums[i] - total_num_false

    puts "CYSTEINE FPR: "
    puts "  (# peps containing >= 1 cysteines)"
    puts "              actual: #{ac}"
    puts "fraction of expected: #{short(fraction_of_expected)}"
    puts "     expected # FP's: " + short(total_num_false)
    puts "      estimated  FPR: " + short( 100.0*cys_fprate ) + " % "

    puts "combined_score = x1 + x2 + x3 + 20.0*deltacn + 4000.0*(1.0/ppm)"
    puts "Combined Score & FPR"
    puts "#{combined_score}\t#{cys_fprate}"
    puts "Combined Score & fraction of expected"
    #puts "#{combined_score} #{fraction_of_expected}"
    to_write_cys_find = ["WRITE_CYS_FIND:", combined_score, fraction_of_expected]
    puts to_write_cys_find.join("\t") if WRITE_CYS_FIND
    puts(['TABULATE:', combined_score, pep_tps, pep_fpr, cys_tps, cys_fprate, '', x1, x2, x3, deltacn, ppm].join("\t")) if opt.tabulate

  end

  def filter_legend(fppr_methods)
    lines = []
    lines << "Note: protein FPPR values are probably optimistic"
    lines << "[this implementation assumes an equal likelihood that a false peptide"
    lines << " comes from a protein with more hits as one with less (which is probably"
    lines << " not the case)]"
    lines << "* = deltacn_star = peptides with deltacn > 1.0 (no sibling hits)"
    if fppr_methods.size > 0
      lines << "Following are methods for determining false identification rate:"
      lines << ['dcy=decoy', 'cys=cysteine', 'tps=known_true_positives'].join(" ")
      ## when tmm is implemented:
      #lines << ['dcy=decoy', 'cys=cysteine', 'tmm=transmembrane', 'tps=known_true_positives'].join(" ")
    end
    lines.join("\n")
  end

  # does this give aafreq from a fasta file?
  #      freq = cysteines.aafreqs[:C]

  # returns  [total_number_false, fppr]
  # pephits can be an array or a hash of peptides keyed on :aaseq
  def fraction_false_by_true_pos(pephits, true_pos_aaseqs_ar)
    if pephits.is_a? Hash
      seqs = pephits.keys
    else
      seqs = pephits.map do |v|
        v.aaseq
      end
    end
    real_tps = 0
    real_fps = 0
    # could also do with partition
    seqs.each do |pep_aaseq|
      if true_pos_aaseqs_ar.any? {|prot_aaseq| prot_aaseq.include? pep_aaseq}
        real_tps += 1
      else
        real_fps += 1
      end
    end
    real_fppr = real_fps.to_f/pephits.size
    [real_fps, real_fppr]
  end

  def filter_spec_id(spec_id, filter_args, args)
    results_hash = {}
    # that second argument is to update protein peptides
    pephits = spec_id.filter_sequest(filter_args)

    results_hash[:prothits] = SpecID.passing_proteins(pephits, :no_update)
    results_hash[:pephits] = pephits
    results_hash[:dcn_cnt] = pephits.select{|v| v.deltacn > 1.0}.size
    # be aware that this is a hash keyed by aaseq and values of arrays of
    # peptides sharing the same aaseq!
    results_hash[:aaseq] = pephits.hash_by(:aaseq)
    results_hash
  end

  # returns [#FP, FPPR]
  def dcy_fppr(pephits, false_pephits)
    fps = false_pephits.size
    [fps, fps.to_f/pephits.size]
  end

  def tmm_fppr(pephits)
    abort "NEED TO IMPLEMENT"
  end

  # returns [#FP, FPPR]
  def cys_fppr(pephits, cys_bg_freq, cys_containing_freq)
    (total_num_false, cys_fprate, fraction_of_expected) = fraction_false_by_cysteines(pephits, cys_bg_freq, cys_containing_freq)
    [total_num_false, cys_fprate]
  end

  def tps_fppr(pephits, true_pos_aaseqs_ar)
    fraction_false_by_true_pos(pephits, true_pos_aaseqs_ar)
  end

  ## methods should be passed in like this 'cysteine' for cysteine_fppr
  ## all methods should return [number_false, fppr]
  ## returns a hash (by method) for each set of pephits 
  ## if :dcy is given as a method, then expects the false pephits array
  def calculate_pep_fppr(pephits_ar, methods, args, false_pephits_ar=nil)
    cnt = 0
    pephits_ar.map do |ph|
      hash = {}
      methods.each do |mth|
        case mth
        when :dcy
          hash[mth.to_sym] = send("#{mth}_fppr".to_sym, ph, false_pephits_ar[cnt])  
        when :cys
          hash[mth.to_sym] = send("#{mth}_fppr".to_sym, ph, *(args[:cys]) )
        when :tps
          hash[mth.to_sym] = send("#{mth}_fppr".to_sym, ph, (args[:tps]) )
        else
          hash[mth.to_sym] = send("#{mth}_fppr".to_sym, ph)  
        end
      end
      cnt += 1
      hash
    end
  end

  # fpr is a SpecID obj that is the false positives
  # cysteines holds an aafreqs object or nil
  def filter_round(spec_ids, filter_args, args)

    # push fpr on the end for the calculations
    ## FILTER the NORMAL spec_id objects
    little_tables = []
    spec_ids.each_with_index do |spec_id, i|
      normal_results = filter_spec_id(spec_id, filter_args, args)

      ## FILTER the FALSE objects (if given)
      false_results = 
        if args[:dcy]
          little_args_hash = args.dup
          false_results = filter_spec_id(args[:dcy][i], filter_args, little_args_hash)
        end

      ## HOW TO CALCULATE FPPR FOR EVERYTHING:
      # pephits     Fpephits C/Tpephits  TPpephits
      # uniqaa      Funiqaa  C/Tuniqaa   TPuniqaa
      # prothits    ProtFPR(Fpephits, prothits)   ProtFPR(C/Tpephits, prothits)   ProtFPR(total-TPpephits, prothits)
      # OccProthits ProtFPR(Funiqaa, OccProthits) ProtFPR(C/Tuniqaa, OccProthits) ProtFPR(total-TPuniqaa, OccProthits)
      # C/T = cystein or Transmembrane method

      ## set up false results array
      if args[:dcy]
        fr_ar = [false_results[:pephits], false_results[:aaseq]]
      else
        fr_ar = nil
      end
      (pephits_fppr_results, aaseq_fppr_results) = calculate_pep_fppr([normal_results[:pephits], normal_results[:aaseq]], @fppr_methods, args, fr_ar)

      ## NORMAL prothits
      ## update prothits peptides
      updated_proteins = SpecID.passing_proteins(normal_results[:pephits], :update)
      pep_cnt_arr = updated_proteins.map {|v| v.peps.size }

      ## update occams prothits
      if args[:occams_razor]
        updated_occams_protein_triplets = SpecID::occams_razor(updated_proteins, true)
        occams_pep_cnt_arr = updated_occams_protein_triplets.map {|v| v[1].size }
        occams_prots = updated_occams_protein_triplets.map {|v| v[0] }
        normal_results[:occams_razor] = occams_prots
      end

      ## note that the original prot.peps arrays are obliterated by this.
      ## we would need to re-update if someone wanted these

      prothits_fppr_results = {}
      occams_results = {}
      @fppr_methods.each do |mth|
        prothits_fppr_results[mth] = protein_fppr(pep_cnt_arr, pephits_fppr_results[mth].first.ceil.to_i, NUM_PROT_FPPR_ITERATIONS)
        occams_results[mth] = protein_fppr(occams_pep_cnt_arr, aaseq_fppr_results[mth].first.ceil.to_i, NUM_PROT_FPPR_ITERATIONS) if args[:occams_razor]
      end

      fppr_results = {
        :pephits => pephits_fppr_results,
        :aaseq => aaseq_fppr_results,
        :prothits => prothits_fppr_results,
      }
      fppr_results[:occams_razor] = occams_results if args[:occams_razor]

      ## CHANGE ALL RESULTS INTO PERCENTAGES:
      fppr_results.each do |bk,hash|
        hash.each do |k,val|
          hash[k][1] = 100.0 * val[1] 
        end
      end
      little_tables[i] = to_table( spec_id, args, normal_results, fppr_results, @groups_reporting, @fppr_methods, @cat_labels)
    end

    out filter_params_string(filter_args, @fppr_methods)
    little_tables.each do |tbl|
      out tbl.to_formatted_string(nil, '  ')
      out "-----------------------------------------------\n"
    end
    #big_table(spec_ids, filter_args, args, normal_results, groups_reporting, fppr_results, cat_labels)
    
  end

  
  def filter_params_string(filter_args, fppr_methods)
    (x1, x2, x3, deltacn, ppm) = filter_args
    st = []
    st << "=========================================================================="
    st << " xcorr(1,2,3) >= #{x1},#{x2},#{x3} || deltacn >= #{deltacn} || ppm <= #{ppm} "
    st << ''
    st.join("\n")
    #st = []
    #st << ["xcorr(1,2,3) >= #{x1},#{x2},#{x3}", "deltacn >= #{deltacn}", "ppm <= #{ppm}"].join("\t")
    #st
  end

  def to_table(spec_id, args, normal_results, fppr_results, groups_reporting, fppr_methods, cat_labels)
    #table is in the form: { column heading => [ values ] }
    
    title = spec_id.passed_in_filename
    col_labels = ['num', *(fppr_methods.map{|v| "#{v}%" })]

    row_labels = groups_reporting.map {|grp| cat_labels[grp]}
    dt = groups_reporting.map do |grp|
      line = [normal_results[grp].size]
      fppr_methods.each do |mth|
        line << fppr_results[grp][mth][1]
      end
      line
    end

    Table.new(dt, row_labels, col_labels, title)
    #puts(['TABULATE:', combined_score, pep_tps, pep_fppr, real_tps, real_fppr, '', x1, x2, x3, deltacn, ppm].join("\t")) if opt.tabulate
  end

  def combined_score(filter_args)
    (x1, x2, x3, deltacn, ppm) = filter_args
    combined_score = x1 + x2 + x3 + 20.0*deltacn + 4000.0*(1.0/ppm)
  end

  # assumes its already chomped
  # updates the 5 globals
  def prep_reply(reply, base)
    if reply == 'q' ; exit ; end
    if reply =~ /^\s*$/
      base
    elsif reply
      arr = reply.split(/\s+/)
      to_change = []
      to_change_hash = {}
      arr.each do |it|
        if it.include? ':'
          (k,v) = it.split(':')
          to_change_hash[k] = v
        else
          to_change << it
        end
      end
      to_change.each_with_index do |tc,i|
        begin
          base[i] = tc.to_f
        rescue NoMethodError
          out "BAD ARG: #{tc}"
          return false
        end
      end
      to_change_hash.each do |k,v|
        case k
        when 'x1' ; base[0] = v
        when 'x2' ; base[1] = v
        when 'x3' ; base[2] = v
        when 'dcn' ; base[3] = v
        when 'ppm' ; base[4] = v
        else
          out "BAD ARG: #{k}:#{v}"
        end
      end
      base.map {|v| v.to_f }
    else
      false
    end
  end

  def file_to_prefiltered_spec_id(file, opt)
    spec_id = nil
    marshal_file = file + ".prefiltered.msh"
    if File.exist?(marshal_file)
      File.open(marshal_file) do |fh|
        spec_id = Marshal.load(fh)
      end
    else
      spec_id = SpecID.new(file)
      spec_id.passed_in_filename = file
      spec_id.top_peps_prefilter!
      ## marshal it!
      if opt.marshal
        File.open(marshal_file, "w") do |fh|
          Marshal.dump(spec_id,fh)
        end
      end
    end
    spec_id
  end

  def interactive_help
    string = []
    string << "********************************************************"
    string << "INTERACTIVE FILTERING HELP:"
    string << "enter: <x1> <x2> <x3> <dcn> <ppm>"
    string << "or   : x1:<x1> x2:<x2> x3:<x3> dcn:<dcn> ppm:<ppm>"
    string << "or   : dcn:<dcn>"
    string << "or   : <x1> <x2> ppm:<ppm>"
    string << "etc..."
    string << "<enter> to (re)run current values"
    string << "'q' to quit"
    string << "********************************************************"
    string.join("\n")
  end


end