# = Operations along a dimension such as running mean
#
# (Plan: binning will be supported too)

require "numru/gphys/gphys"
require 'numru/gphys_ext'  # extension library

module NumRu
  class GPhys
    BC_SIMPLE = 10   # enum in convol_filter.c
    BC_CYCLIC = 11   # enum in convol_filter.c
    BC_TRIM   = 12   # enum in convol_filter.c

    @@default_missval = 9.9692099683868690e+36 # NC_FILL_DOUBLE/FLOAT ~15*2^119

    # Running mean along a dimension (with optional weight specification).
    # 
    # ARGUMENTS
    # * dim (Integer or String) : the dimension
    # * len_or_wgt : If Integer, specifies the length; if 1D NArray,
    #   specifies the weight (e.g., NArray[1.0, 2.0, 1.0] for
    #   the 1-2-1 smooting)
    # * bc (Integer; optional) : Specify one of the following:
    #   * nil (default) : choose BC_SIMPLE or BC_CYCLIC automatically
    #   * GPhys::BC_SIMPLE : Averaging is truncated at the boundaries
    #     (the number of grid points used is reduced near the boundaries).
    #     The shape of the object is conserved.
    #   * GPhys::BC_CYCLIC : Cyclic boundary condition. Shape conserved.
    #   * GPhys::BC_TRIM : Grids near the boundaries are trimmed to
    #     secure the number of grid points to average.
    #     Shape not conserved; length along the dim is reduced by (len-1).
    # * nminvalid (Integer; optional; default=1): 
    #   This parameter is used only when the data have missing.
    #   Minimum number of grid points needed for averaging.
    #   Must be from 1 to len.
    # 
    # RETURN VALUE
    # * a GPhys
    # 
    # REMARK AND LIMITATION
    # * If the length of the running mean is even number, fewer
    #   grid points are used from the "left" side; e.g.,
    #   If len is 6, result[2] is a mean over self[0]..self[5].
    # * Regardless the na_type of self, double is used for averaging, so:
    # * This method does not support complex numbers.
    # 
    def running_mean(dim, len_or_wgt=nil, bc=nil, nminvalid=1)

      #< process arguments >

      dim = dim_index(dim)  # to handle String or negative specification

      if bc.nil?
        if axis(dim).cyclic_extendible?
          bc=BC_CYCLIC
        else
          bc=BC_SIMPLE
        end
      end

      case len_or_wgt
      when nil
        raise ArgumentError, "You need to specify the length (Integer) or the weight (1D NArray) as the 2nd argument"
      when Integer
        # len_or_wgt is a length
        len = len_or_wgt  
        wgt = NArray.float(len).fill!(1.0)
      else
        # len_or_wgt is a weight
        wgt = len_or_wgt
        if (!wgt.respond_to?(:rank) || wgt.rank != 1)
          raise ArgumentError, "wgt: expect a 1D NArray(-like obj)"
        end
        len = wgt.length
      end

      #< calc running mean >

      vi = self.val
      if (vi.typecode > NArray::DFLOAT)
        raise("This method supports only real or integer data")
      end 
      if vi.is_a?(NArrayMiss)
        vi, missval = nam2na_missval(vi)
        vo = c_running_mean(vi,dim,wgt,bc,missval,nminvalid)
        vo = NArrayMiss.to_nam(vo, vo.ne(missval) )
      else
        vo = c_running_mean(vi,dim,wgt,bc)
      end

      #< grid >

      if (bc ==  BC_TRIM)
        fst = (len-1)/2    # if odd len/2, if even len/2-1
        lst = -(len/2) - 1 
        grid = self.grid[ *([true]*dim + [fst..lst, false]) ]
      else
        grid = self.grid
      end

      #< result >
      vvo = VArray.new( vo, self.data, self.name )  # Inherit name & attrs
      GPhys.new( grid, vvo )

    end

    # Running mean along two dimensions (with optional weight specification).
    #
    # This method would be needed for data with missing. If the data
    # has no missing, you should apply running_mean consecutively
    # along the two dimensions you want, which will be faster and efficient.
    # 
    # However, if the data have missing, the results are not the same.
    # That's why this method is needed.
    # 
    # ARGUMENTS
    # * dim1 (Integer or String) : a dimension to apply the running mean
    # * dim2 (Integer or String) : another dimension to apply the running mean
    #   (dim1 must be smaller than dim2)
    # * len_or_wgt1 : If Integer, specifies the length of running mean for dim1.
    #   If 1D NArray, it specifies the weight (e.g., NArray[1.0, 2.0, 1.0] for
    #   the 1-2-1 smoothing)
    # * len_or_wgt2 : as len_or_wgt1 but for dim2.
    # * bc1 (Integer or nil; optional) : Specify one of the following for dim1:
    #   * nil : choose BC_SIMPLE or BC_CYCLIC automatically
    #   * GPhys::BC_SIMPLE : Averaging is truncated at the boundaries
    #     (the number of grid points used is reduced near the boundaries).
    #     The shape of the object is conserved.
    #   * GPhys::BC_CYCLIC : Cyclic boundary condition. Shape conserved.
    # * bc2 : as bc1 but for dim2.
    # * nminvalid (Integer; optional; default=1): 
    #   Minimum number of grid points needed to define the running mean
    #   (it is the number of valid data in the 2-dimensional tile).
    #
    def running_mean_2D(dim1, dim2, len_or_wgt1, len_or_wgt2,
                        bc1=nil, bc2=nil,
                        nminvalid=1)

      dim1 = dim_index(dim1)  # to handle String or negative specification
      dim2 = dim_index(dim2)  # to handle String or negative specification

      if bc1.nil?
        if axis(dim1).cyclic_extendible?
          bc1=BC_CYCLIC
        else
          bc1=BC_SIMPLE
        end
      end
      if bc2.nil?
        if axis(dim2).cyclic_extendible?
          bc2=BC_CYCLIC
        else
          bc2=BC_SIMPLE
        end
      end

      vi = self.val
      if vi.is_a?(NArray)
        # Just for convenience.
        self.running_mean(dim1, len_or_wgt1, bc1, nminvalid).running_mean(dim2, len_or_wgt2, bc2, nminvalid)
      end
      
      if (vi.typecode > NArray::DFLOAT)
        raise("This method supports only real or integer data")
      end 
      if vi.is_a?(NArrayMiss)
        vi, missval = nam2na_missval(vi)
        vo = c_running_mean_2D(vi, dim1,len_or_wgt1,bc1, dim2,len_or_wgt2,bc2,
                missval, nminvalid)
        vo = NArrayMiss.to_nam(vo, vo.ne(missval) )
      else
        raise("This method accepts only data with missing. If not, you should apply running_mean twice for dim1 & dim2, respectively")
      end

      vvo = VArray.new( vo, self.data, self.name )  # Inherit name & attrs
      GPhys.new( self.grid, vvo )
    end

    # Binning along a dimension (mean)
    # 
    # The values are averaged every "len" grids; unlike running_mean
    # the number of grids is reduced to 1/len. 
    # Currently, the only available boundary condition is BC_TRIM.
    # 
    # ARGUMENTS
    # * dim (Integer or String) : the dimension
    # * len (Integer): length of the bin
    # * nminvalid (Integer; optional; defualt=1): Effective only for data with
    #   missing. Minimum number of grid points needed for averaging (1 to len).
    # 
    # RETURN VALUE
    # * a GPhys
    # 
    def bin_mean(dim, len, nminvalid=1)
      dim = dim_index(dim)  # to handle String or negative specification
      GPhys.new( grid.binning(dim, len), data.bin_mean(dim, len, nminvalid) )
    end

    # Binning along a dimension (summation)
    # 
    # Similar to bin_mean, but the values are simply summed without averaging
    # 
    # ARGUMENTS
    # * dim (Integer or String) : the dimension
    # * len (Integer): length of the bin
    # * nminvalid (Integer; optional; defualt=1): Effective only for data with
    #   missing. Minimum number of grid points needed for averaging (1 to len).
    # 
    # RETURN VALUE
    # * a GPhys
    # 
    def bin_sum(dim, len, nminvalid=1)
      dim = dim_index(dim)  # to handle String or negative specification
      GPhys.new( grid.binning(dim, len), data.bin_sum(dim, len, nminvalid) ) 
    end

    ##### private #########
    def nam2na_missval(nam)
      missval = ( (a=get_att('_FillValue')) ? a[0] : nil ) || 
                ( (a=get_att('missing_value')) ? a[0] : nil ) || 
                @@default_missval
      [ nam.to_na(missval), missval ]
    end
    private :nam2na_missval

    #def convolution(dim, g=nil, bc=BC_SIMPLE)
    #end
  end

  class VArray
    @@default_missval = 9.9692099683868690e+36 # NC_FILL_DOUBLE/FLOAT ~15*2^119

    def bin_mean(dim, len, nminvalid=1)
      vi = self.val
      if vi.is_a?(NArrayMiss)
        vi, missval = nam2na_missval(vi)
        vo = c_bin_mean(vi,dim,len,missval,nminvalid)  # defined in dim_op.c
        vo = NArrayMiss.to_nam(vo, vo.ne(missval) )
      else
        vo = c_bin_mean(vi,dim,len)  # defined in dim_op.c
      end
      VArray.new( vo, self, self.name )  # Inherit name & attrs 
    end

    def bin_sum(dim, len, nminvalid=1)
      vi = self.val
      if vi.is_a?(NArrayMiss)
        vi, missval = nam2na_missval(vi)
        vo = c_bin_sum(vi,dim,len,missval,nminvalid)  # defined in dim_op.c
        vo = NArrayMiss.to_nam(vo, vo.ne(missval) )
      else
        vo = c_bin_sum(vi,dim,len)  # defined in dim_op.c
      end
      VArray.new( vo, self, self.name )  # Inherit name & attrs 
    end

    ##### private #########
    def nam2na_missval(nam)
      missval = ( (a=get_att('_FillValue')) ? a[0] : nil ) || 
                ( (a=get_att('missing_value')) ? a[0] : nil ) || 
                @@default_missval
      [ nam.to_na(missval), missval ]
    end
    private :nam2na_missval

  end

  class Grid
    def binning(dim, len)
      dim = dim_index(dim)  # to handle String or negative specification

      #< axes >
      axes = (0...rank).collect{|d| axis(d)}
      axes[dim] = axes[dim].binning(len)  # bin the dim-th axis
      newgrid = Grid.new(*axes)

      #< assoc coords are retained if it does not have the dim >
      if assoc_coords
        dimname = self.axis(dim).name
        acs = Array.new
        assoccoordnames.each do |nm|
          ac = assoc_coord_gphys(nm)
          acs.push(ac) if !ac.axnames.include?(dimname)
        end
        newgrid.set_assoc_coords(acs) if acs.length > 0
      end

      #< return >
      newgrid
    end
  end

  class Axis
    def binning(len)
      if cell? && cell_center?
        c = cell_center.bin_mean(0,len)
        b = cell_bounds[ {0..-1=>len} ]
        newax = Axis.new(true).set_cell(c,b)
        newax = axcel.dup.set_pos_to_center
      else
        newax = Axis.new().set_pos( pos.bin_mean(0,len) )
      end
      newax  
    end
  end
end

######################################################
## < test >
if $0 == __FILE__
  require 'test/unit'

  class TC_GPhys_grid_op < Test::Unit::TestCase
    include NumRu
    def setup
      nx = 11
      ny = 5
      vx = VArray.new( NArray.float(nx).indgen! ).rename("x")
      vy = VArray.new( NArray.float(ny).indgen! ).rename("y")
      xax = Axis.new().set_pos(vx)
      yax = Axis.new().set_pos(vy)
      grid = Grid.new(xax, yax)
      z = VArray.new( na = NArray.float(nx, ny).indgen! % 5, nil, "z" )
      zu = VArray.new( NArray.float(nx, ny).fill(3.0), nil, "z" )

      @gp = GPhys.new(grid,z)
      @gpu = GPhys.new(grid,zu)

      nam = NArrayMiss.to_nam(na)
      nam.invalidation(1,1)
      nam.invalidation(2,2)
      (1..4).each{|i| nam.invalidation(i,3)}
      zm = VArray.new( nam, nil, "zm" )
      @gpm = GPhys.new(grid,zm)

    end

    def test_0_0_running_mean
      puts "*** running_mean: basic ***"
      p(v0 = @gp.val)

      p(v = @gp.running_mean(0,3).val)
      assert_in_delta(v[4,0], 2.33333, 1e-4)

      p(v = @gp.running_mean(1,3,GPhys::BC_TRIM).val)
      assert_in_delta(v[2,1], 2.33333, 1e-4)

      p(v = @gpu.running_mean(0,3,GPhys::BC_TRIM).val)
      assert_in_delta(v[0,0], 3.0, 1e-4)
      assert_equal(v.shape[0], v0.shape[0]-2)

      p(v = @gp[0..9,true].running_mean(0,5,GPhys::BC_CYCLIC).val)
      assert_in_delta(v[0,0], 2.0, 1e-4)
    end

    def test_0_1_running_mean
      puts "*** running_mean: weight ***"
      wgt = NArray[1.0, 2.0, 1.0]
      p @gp.val
      p(v = @gp.running_mean(0,wgt).val)
      assert_in_delta(v[4,0], (3.0+4.0*2)/4, 1e-4)
    end

    def test_0_2_running_mean
      puts "*** running_mean: with data missing ***"
      p @gpm.val
      p(v = @gpm.running_mean(0,3).val)
      assert_in_delta(v[2,2], 1.5, 1e-4)
      assert(v[2,3] > 1e30)  # missing value
    end

    def test_1_0_bin_mean
      puts "*** bin_mean ***"
      p @gp.val
      b = @gp.bin_mean(0,3)
      v = b.val
      p v, b.coord(0).val, b.coord(1).val
      assert_in_delta(b.val[1,1], 5.0/3.0, 1e-4)
      assert_in_delta(b.coord(0).val[1], 4.0, 1e-4)
    end

    def test_1_1_bin_mean
      puts "*** bin_mean: with data missing ***"
      p @gpm.val
      b = @gpm.bin_mean(0,3,2)
      v = b.val
      p v, b.coord(0).val, b.coord(1).val
      assert_in_delta(v[0,2], 2.5, 1e-4)
      assert(v[0,3] > 1e30)  # missing value
    end

    def test_2_0_bin_sum
      puts "*** bin_sum ***"
      p @gp.val
      b = @gp.bin_sum(0,3)
      v = b.val
      p v, b.coord(0).val, b.coord(1).val
      assert_in_delta(b.val[1,1], 5.0, 1e-4)
    end

    def test_2_1_bin_sum
      puts "*** bin_sum: with data missing ***"
      p @gpm.val
      b = @gpm.bin_sum(0,3,2)
      v = b.val
      p v, b.coord(0).val, b.coord(1).val
      assert_in_delta(v[0,2], 5.0, 1e-4)
      assert(v[0,3] > 1e30)  # missing value
    end

  end
end