require 'mspire/bin'

module Mspire
  # a collection of peak objects
  class PeakList < Array

    def lo_x
      self.first[0]
    end

    def hi_x
      self.last[0]
    end

    DEFAULT_MERGE = {
      :bin_width => 5,
      :bin_unit => :ppm,
      :normalize => true,
      :return_data => false,
      :split => :share,
      :centroided => true,
    }

    class << self

      def create_bins(peaklists, opts)
        min, max = min_max_mz(peaklists)

        divisions = []
        bin_width = opts[:bin_width]
        use_ppm = (opts[:bin_unit] == :ppm)
        current_mz = min
        loop do
          if current_mz >= max
            divisions << max
            break
          else
            divisions << current_mz
            current_mz += ( use_ppm ? current_mz./(1e6).*(bin_width) : bin_width )
          end
        end
        # make each bin exclusive so there is no overlap
        bins = divisions.each_cons(2).map {|pair| Mspire::Bin.new(*pair, true) }
        # make the last bin *inclusive* of the terminating value
        bins[-1] = Mspire::Bin.new(bins.last.begin, bins.last.end)
        bins
      end

      def min_max_mz(peaklists)
        # find the min and max across all spectra
        first_peaklist = peaklists.first
        min = first_peaklist.first[0]; max = first_peaklist.last[0]
        peaklists.each do |peaklist|
          min = peaklist.lo_x if peaklist.lo_x < min
          max = peaklist.hi_x if peaklist.hi_x > max
        end
        [min, max]
      end

      def merge_centroids(peaklists, opts={})

        # Create Mspire::Bin objects
        bins = opts[:bins] ? opts[:bins] : create_bins(peaklists, opts)

        peaklists.each do |peaklist|
          Mspire::Bin.bin(bins, peaklist, &:first)
        end

        pseudo_peaks = bins.map do |bin|
          [bin, bin.data.reduce(0.0) {|sum,peak| sum + peak[1] }]
        end

        pseudo_peaklist = Mspire::PeakList.new(pseudo_peaks)

        peak_lists = pseudo_peaklist.split(opts[:split])

        return_data = []
        final_peaklist = []
        peak_lists.each_with_index do |peak_list,i|
          #peaks.each do |peak|
          tot_intensity = peak_list.map(&:last).reduce(:+)
          return_data_per_peak = [] if opts[:return_data]
          weighted_mz = 0.0
          peak_list.each do |peak|
            pre_scaled_intensity = peak[0].data.reduce(0.0) {|sum,v| sum + v.last }
            post_scaled_intensity = peak[1]
            # some peaks may have been shared.  In this case the intensity
            # for that peak was downweighted.  However, the actual data
            # composing that peak is not altered when the intensity is
            # shared.  So, to calculate a proper weighted avg we need to
            # downweight the intensity of any data point found within a bin
            # whose intensity was scaled.
            correction_factor = 
              if pre_scaled_intensity != post_scaled_intensity
                post_scaled_intensity / pre_scaled_intensity
              else
                1.0
              end

            return_data_per_peak.push(*peak[0].data) if opts[:return_data]

            peak[0].data.each do |lil_point|
              weighted_mz += lil_point[0] * ( (lil_point[1].to_f * correction_factor) / tot_intensity)
            end
          end
          return_data << return_data_per_peak if opts[:return_data]
          final_peaklist << Mspire::Peak.new([weighted_mz, tot_intensity])
        end
        [final_peaklist, return_data]
      end

      # returns a new peak_list which has been merged with the others. 
      # opts[:resolution]) and then segment according to monotonicity (sharing
      # intensity between abutting points).  The  final m/z is the weighted
      # averaged of all the m/z's in each peak.  Valid opts (with default listed
      # first):
      #
      #     :bin_width => 5 
      #     :bin_unit => :ppm | :amu        interpret bin_width as ppm or amu
      #     :bins => array of Mspire::Bin objects  for custom bins (overides other bin options)
      #     :normalize => true              if true, divides total intensity by 
      #                                     number of spectra
      #     :return_data => false           returns a parallel array containing
      #                                     the peaks associated with each returned peak
      #     :split => false | :share | :greedy_y   see Mspire::Peak#split
      #     :centroided => true             treat the data as centroided
      #
      # The binning algorithm is roughly the fastest possible algorithm that
      # would allow for arbitrary, non-constant bin widths (a ratcheting
      # algorithm O(n + m))
      #
      # Assumes the peaklists are already sorted by m/z.
      def merge(peaklists, opts={})
        opts = DEFAULT_MERGE.merge(opts)

        (peaklist, returned_data) =  
          if opts[:centroided]
            merge_centroids(peaklists, opts)
          else
            raise NotImplementedError, "need to implement profile merging"
          end

        if opts[:normalize]
          sz = peaklists.size
          peaklist.each {|peak| peak[1] = peak[1].to_f / sz }
        end
        if opts[:return_data]
          $stderr.puts "returning peaklist (#{peaklist.size}) and data" if $VERBOSE
          [peaklist, returned_data]
        else
          $stderr.puts "returning peaklist (#{peaklist.size})" if $VERBOSE
          peaklist 
        end
      end
    end


    # returns an Array of peaklist objects.  Splits run of 1 or more local
    # minima into multiple peaklists.  When a point is 'shared' between two
    # adjacent hill-ish areas, the choice of how to resolve multi-hills (runs
    # of data above zero) is one of:
    #
    #     false/nil => only split on zeros
    #     :share => give each peak its rightful portion of shared peaks, dividing the
    #               intensity based on the intensity of adjacent peaks
    #     :greedy_y => give the point to the peak with highest point next to
    #                  the point in question. tie goes lower.
    #
    # if return_local_minima is true, a parallel array of local minima indices is
    # returned (only makes sense if split_multipeaks is false)
    # 
    # assumes that a new peak can be made with an array containing the x
    # value and the y value.
    def split(split_multipeaks=false, return_local_minima=false)
      if split_multipeaks
        (zeroed_peaks, local_min_ind_ar) = self.split(false, true)
        $stderr.print "splitting on local minima ..." if $VERBOSE
        no_local_minima_peaks = zeroed_peaks.zip(local_min_ind_ar).map do |peak, lm_indices|
          new_peaks = [ peak.class.new ]
          if lm_indices.size > 0
            prev_lm_i = -1   # <- it's okay, we don't use until it is zero
            lm_indices.each do |lm_i|
              lm = peak[lm_i]
              point_class = lm.class

              # push onto the last peak all the points from right after the previous local min
              # to just before this local min
              new_peaks.last.push( *peak[(prev_lm_i+1)..(lm_i-1)] )
              before_pnt = peak[lm_i-1]
              after_pnt = peak[lm_i+1]

              case split_multipeaks
              when :share
                sum = before_pnt[1] + after_pnt[1]
                # push onto the last peak its portion of the local min
                new_peaks.last << point_class.new( [lm[0], lm[1] * (before_pnt[1].to_f/sum)] )
                # create a new peak that contains its portion of the local min
                new_peaks << self.class.new( [point_class.new([lm[0], lm[1] * (after_pnt[1].to_f/sum)])] )
                prev_lm_i = lm_i
              when :greedy_y
                if before_pnt[1] >= after_pnt[1]
                  new_peaks.last << lm
                  new_peaks << self.class.new
                  prev_lm_i = lm_i
                else
                  new_peaks << self.class.new( [lm] )
                  prev_lm_i = lm_i
                end
              else
                raise ArgumentError, "only recognize :share, :greedy_y, or false for the arg in #split(arg)"
              end
            end
            new_peaks.last.push( *peak[(prev_lm_i+1)...peak.size] )
            new_peaks
          else
            [peak]
          end
        end.flatten(1) # end zip
        $stderr.puts "now #{no_local_minima_peaks.size} peaks." if $VERBOSE
        no_local_minima_peaks 
      else
        $stderr.print "splitting on zeros..." if $VERBOSE
        # first, split the peaks based on zero intensity values 
        # and simultaneously keep track of the local minima within each
        # resulting peak
        peak_lists = []
        local_min_ind_ar = []
        in_peak = false
        self.each_with_index do |peak, index|
          previous_y = self[index - 1][1]
          if peak[1] > 0
            if !in_peak
              in_peak = 0
              peak_lists << self.class.new([peak])
              local_min_ind_ar << []
            else
              peak_lists.last << peak
              # if on_upslope(previous_y, point[1])
              if previous_y < peak[1]
                # If we were previously on a downslope and we are now on an upslope
                # then the previous index is a local min
                prev_previous_y = self[index - 2][1]
                # on_downslope(prev_previous_y, previous_y)
                if prev_previous_y > previous_y
                  # We have found a local min
                  local_min_ind_ar.last << (in_peak-1)
                end
              end # end if (upslope)
            end # end if !in_peak
            in_peak += 1
          elsif in_peak
            in_peak = false
          end # end if point[1] > 0
        end
        $stderr.puts "#{peak_lists.size} no-whitespace-inside peak_lists." if $VERBOSE
        return_local_minima ? [peak_lists, local_min_ind_ar] : peak_lists
      end # 
    end # def split
  end
end