# = Operations along a dimension such as running mean # # (Plan: binning will be supported too) require "numru/gphys/gphys" require 'numru/gphys_ext' # extension library module NumRu class GPhys BC_SIMPLE = 10 # enum in convol_filter.c BC_CYCLIC = 11 # enum in convol_filter.c BC_TRIM = 12 # enum in convol_filter.c @@default_missval = 9.9692099683868690e+36 # NC_FILL_DOUBLE/FLOAT ~15*2^119 # Running mean along a dimension (with optional weight specification). # # ARGUMENTS # * dim (Integer or String) : the dimension # * len_or_wgt : If Integer, specifies the length; if 1D NArray, # specifies the weight (e.g., NArray[1.0, 2.0, 1.0] for # the 1-2-1 smooting) # * bc (Integer; optional) : Specify one of the following: # * nil (default) : choose BC_SIMPLE or BC_CYCLIC automatically # * GPhys::BC_SIMPLE : Averaging is truncated at the boundaries # (the number of grid points used is reduced near the boundaries). # The shape of the object is conserved. # * GPhys::BC_CYCLIC : Cyclic boundary condition. Shape conserved. # * GPhys::BC_TRIM : Grids near the boundaries are trimmed to # secure the number of grid points to average. # Shape not conserved; length along the dim is reduced by (len-1). # * nminvalid (Integer; optional; default=1): # This parameter is used only when the data have missing. # Minimum number of grid points needed for averaging. # Must be from 1 to len. # # RETURN VALUE # * a GPhys # # REMARK AND LIMITATION # * If the length of the running mean is even number, fewer # grid points are used from the "left" side; e.g., # If len is 6, result[2] is a mean over self[0]..self[5]. # * Regardless the na_type of self, double is used for averaging, so: # * This method does not support complex numbers. # def running_mean(dim, len_or_wgt=nil, bc=nil, nminvalid=1) #< process arguments > dim = dim_index(dim) # to handle String or negative specification if bc.nil? if axis(dim).cyclic_extendible? bc=BC_CYCLIC else bc=BC_SIMPLE end end case len_or_wgt when nil raise ArgumentError, "You need to specify the length (Integer) or the weight (1D NArray) as the 2nd argument" when Integer # len_or_wgt is a length len = len_or_wgt wgt = NArray.float(len).fill!(1.0) else # len_or_wgt is a weight wgt = len_or_wgt if (!wgt.respond_to?(:rank) || wgt.rank != 1) raise ArgumentError, "wgt: expect a 1D NArray(-like obj)" end len = wgt.length end #< calc running mean > vi = self.val if (vi.typecode > NArray::DFLOAT) raise("This method supports only real or integer data") end if vi.is_a?(NArrayMiss) vi, missval = nam2na_missval(vi) vo = c_running_mean(vi,dim,wgt,bc,missval,nminvalid) vo = NArrayMiss.to_nam(vo, vo.ne(missval) ) else vo = c_running_mean(vi,dim,wgt,bc) end #< grid > if (bc == BC_TRIM) fst = (len-1)/2 # if odd len/2, if even len/2-1 lst = -(len/2) - 1 grid = self.grid[ *([true]*dim + [fst..lst, false]) ] else grid = self.grid end #< result > vvo = VArray.new( vo, self.data, self.name ) # Inherit name & attrs GPhys.new( grid, vvo ) end # Running mean along two dimensions (with optional weight specification). # # This method would be needed for data with missing. If the data # has no missing, you should apply running_mean consecutively # along the two dimensions you want, which will be faster and efficient. # # However, if the data have missing, the results are not the same. # That's why this method is needed. # # ARGUMENTS # * dim1 (Integer or String) : a dimension to apply the running mean # * dim2 (Integer or String) : another dimension to apply the running mean # (dim1 must be smaller than dim2) # * len_or_wgt1 : If Integer, specifies the length of running mean for dim1. # If 1D NArray, it specifies the weight (e.g., NArray[1.0, 2.0, 1.0] for # the 1-2-1 smoothing) # * len_or_wgt2 : as len_or_wgt1 but for dim2. # * bc1 (Integer or nil; optional) : Specify one of the following for dim1: # * nil : choose BC_SIMPLE or BC_CYCLIC automatically # * GPhys::BC_SIMPLE : Averaging is truncated at the boundaries # (the number of grid points used is reduced near the boundaries). # The shape of the object is conserved. # * GPhys::BC_CYCLIC : Cyclic boundary condition. Shape conserved. # * bc2 : as bc1 but for dim2. # * nminvalid (Integer; optional; default=1): # Minimum number of grid points needed to define the running mean # (it is the number of valid data in the 2-dimensional tile). # def running_mean_2D(dim1, dim2, len_or_wgt1, len_or_wgt2, bc1=nil, bc2=nil, nminvalid=1) dim1 = dim_index(dim1) # to handle String or negative specification dim2 = dim_index(dim2) # to handle String or negative specification if bc1.nil? if axis(dim1).cyclic_extendible? bc1=BC_CYCLIC else bc1=BC_SIMPLE end end if bc2.nil? if axis(dim2).cyclic_extendible? bc2=BC_CYCLIC else bc2=BC_SIMPLE end end vi = self.val if vi.is_a?(NArray) # Just for convenience. self.running_mean(dim1, len_or_wgt1, bc1, nminvalid).running_mean(dim2, len_or_wgt2, bc2, nminvalid) end if (vi.typecode > NArray::DFLOAT) raise("This method supports only real or integer data") end if vi.is_a?(NArrayMiss) vi, missval = nam2na_missval(vi) vo = c_running_mean_2D(vi, dim1,len_or_wgt1,bc1, dim2,len_or_wgt2,bc2, missval, nminvalid) vo = NArrayMiss.to_nam(vo, vo.ne(missval) ) else raise("This method accepts only data with missing. If not, you should apply running_mean twice for dim1 & dim2, respectively") end vvo = VArray.new( vo, self.data, self.name ) # Inherit name & attrs GPhys.new( self.grid, vvo ) end # Binning along a dimension (mean) # # The values are averaged every "len" grids; unlike running_mean # the number of grids is reduced to 1/len. # Currently, the only available boundary condition is BC_TRIM. # # ARGUMENTS # * dim (Integer or String) : the dimension # * len (Integer): length of the bin # * nminvalid (Integer; optional; defualt=1): Effective only for data with # missing. Minimum number of grid points needed for averaging (1 to len). # # RETURN VALUE # * a GPhys # def bin_mean(dim, len, nminvalid=1) dim = dim_index(dim) # to handle String or negative specification GPhys.new( grid.binning(dim, len), data.bin_mean(dim, len, nminvalid) ) end # Binning along a dimension (summation) # # Similar to bin_mean, but the values are simply summed without averaging # # ARGUMENTS # * dim (Integer or String) : the dimension # * len (Integer): length of the bin # * nminvalid (Integer; optional; defualt=1): Effective only for data with # missing. Minimum number of grid points needed for averaging (1 to len). # # RETURN VALUE # * a GPhys # def bin_sum(dim, len, nminvalid=1) dim = dim_index(dim) # to handle String or negative specification GPhys.new( grid.binning(dim, len), data.bin_sum(dim, len, nminvalid) ) end ##### private ######### def nam2na_missval(nam) missval = ( (a=get_att('_FillValue')) ? a[0] : nil ) || ( (a=get_att('missing_value')) ? a[0] : nil ) || @@default_missval [ nam.to_na(missval), missval ] end private :nam2na_missval #def convolution(dim, g=nil, bc=BC_SIMPLE) #end end class VArray @@default_missval = 9.9692099683868690e+36 # NC_FILL_DOUBLE/FLOAT ~15*2^119 def bin_mean(dim, len, nminvalid=1) vi = self.val if vi.is_a?(NArrayMiss) vi, missval = nam2na_missval(vi) vo = c_bin_mean(vi,dim,len,missval,nminvalid) # defined in dim_op.c vo = NArrayMiss.to_nam(vo, vo.ne(missval) ) else vo = c_bin_mean(vi,dim,len) # defined in dim_op.c end VArray.new( vo, self, self.name ) # Inherit name & attrs end def bin_sum(dim, len, nminvalid=1) vi = self.val if vi.is_a?(NArrayMiss) vi, missval = nam2na_missval(vi) vo = c_bin_sum(vi,dim,len,missval,nminvalid) # defined in dim_op.c vo = NArrayMiss.to_nam(vo, vo.ne(missval) ) else vo = c_bin_sum(vi,dim,len) # defined in dim_op.c end VArray.new( vo, self, self.name ) # Inherit name & attrs end ##### private ######### def nam2na_missval(nam) missval = ( (a=get_att('_FillValue')) ? a[0] : nil ) || ( (a=get_att('missing_value')) ? a[0] : nil ) || @@default_missval [ nam.to_na(missval), missval ] end private :nam2na_missval end class Grid def binning(dim, len) dim = dim_index(dim) # to handle String or negative specification #< axes > axes = (0...rank).collect{|d| axis(d)} axes[dim] = axes[dim].binning(len) # bin the dim-th axis newgrid = Grid.new(*axes) #< assoc coords are retained if it does not have the dim > if assoc_coords dimname = self.axis(dim).name acs = Array.new assoccoordnames.each do |nm| ac = assoc_coord_gphys(nm) acs.push(ac) if !ac.axnames.include?(dimname) end newgrid.set_assoc_coords(acs) if acs.length > 0 end #< return > newgrid end end class Axis def binning(len) if cell? && cell_center? c = cell_center.bin_mean(0,len) b = cell_bounds[ {0..-1=>len} ] newax = Axis.new(true).set_cell(c,b) newax = axcel.dup.set_pos_to_center else newax = Axis.new().set_pos( pos.bin_mean(0,len) ) end newax end end end ###################################################### ## < test > if $0 == __FILE__ require 'test/unit' class TC_GPhys_grid_op < Test::Unit::TestCase include NumRu def setup nx = 11 ny = 5 vx = VArray.new( NArray.float(nx).indgen! ).rename("x") vy = VArray.new( NArray.float(ny).indgen! ).rename("y") xax = Axis.new().set_pos(vx) yax = Axis.new().set_pos(vy) grid = Grid.new(xax, yax) z = VArray.new( na = NArray.float(nx, ny).indgen! % 5, nil, "z" ) zu = VArray.new( NArray.float(nx, ny).fill(3.0), nil, "z" ) @gp = GPhys.new(grid,z) @gpu = GPhys.new(grid,zu) nam = NArrayMiss.to_nam(na) nam.invalidation(1,1) nam.invalidation(2,2) (1..4).each{|i| nam.invalidation(i,3)} zm = VArray.new( nam, nil, "zm" ) @gpm = GPhys.new(grid,zm) end def test_0_0_running_mean puts "*** running_mean: basic ***" p(v0 = @gp.val) p(v = @gp.running_mean(0,3).val) assert_in_delta(v[4,0], 2.33333, 1e-4) p(v = @gp.running_mean(1,3,GPhys::BC_TRIM).val) assert_in_delta(v[2,1], 2.33333, 1e-4) p(v = @gpu.running_mean(0,3,GPhys::BC_TRIM).val) assert_in_delta(v[0,0], 3.0, 1e-4) assert_equal(v.shape[0], v0.shape[0]-2) p(v = @gp[0..9,true].running_mean(0,5,GPhys::BC_CYCLIC).val) assert_in_delta(v[0,0], 2.0, 1e-4) end def test_0_1_running_mean puts "*** running_mean: weight ***" wgt = NArray[1.0, 2.0, 1.0] p @gp.val p(v = @gp.running_mean(0,wgt).val) assert_in_delta(v[4,0], (3.0+4.0*2)/4, 1e-4) end def test_0_2_running_mean puts "*** running_mean: with data missing ***" p @gpm.val p(v = @gpm.running_mean(0,3).val) assert_in_delta(v[2,2], 1.5, 1e-4) assert(v[2,3] > 1e30) # missing value end def test_1_0_bin_mean puts "*** bin_mean ***" p @gp.val b = @gp.bin_mean(0,3) v = b.val p v, b.coord(0).val, b.coord(1).val assert_in_delta(b.val[1,1], 5.0/3.0, 1e-4) assert_in_delta(b.coord(0).val[1], 4.0, 1e-4) end def test_1_1_bin_mean puts "*** bin_mean: with data missing ***" p @gpm.val b = @gpm.bin_mean(0,3,2) v = b.val p v, b.coord(0).val, b.coord(1).val assert_in_delta(v[0,2], 2.5, 1e-4) assert(v[0,3] > 1e30) # missing value end def test_2_0_bin_sum puts "*** bin_sum ***" p @gp.val b = @gp.bin_sum(0,3) v = b.val p v, b.coord(0).val, b.coord(1).val assert_in_delta(b.val[1,1], 5.0, 1e-4) end def test_2_1_bin_sum puts "*** bin_sum: with data missing ***" p @gpm.val b = @gpm.bin_sum(0,3,2) v = b.val p v, b.coord(0).val, b.coord(1).val assert_in_delta(v[0,2], 5.0, 1e-4) assert(v[0,3] > 1e30) # missing value end end end