module Daru
  module Maths
    module Statistics
      module DataFrame
        # @!method mean
        #   Calculate mean of numeric vectors
        # @!method variance_sample
        #   Calculate sample variance of numeric vectors
        # @!method range
        #   Calculate range of numeric vectors
        # @!method median
        #   Calculate median of numeric vectors
        # @!method mode
        #   Calculate mode of numeric vectors
        # @!method std
        #   Calculate sample standard deviation of numeric vectors
        # @!method sum
        #   Calculate sum of numeric vectors
        # @!method count
        #   Count the number of non-nil values in each vector
        # @!method min
        #   Calculate the minimum value of each numeric vector
        # @!method product
        #   Compute the product of each numeric vector
        [:mean, :variance_sample, :range, :median, :mode, :std, :sum, :count, :min, :product].each do |meth|
          define_method(meth) do
            compute_stats meth
          end
        end

        # Calculate the maximum value of each numeric vector.
        def max opts={}
          if opts[:vector]
            row[*self[opts[:vector]].max_index.index.to_a]
          else
            compute_stats :max
          end
        end

        # @!method cumsum
        #   Calculate cumulative sum of each numeric Vector
        # @!method standardize
        #   Standardize each Vector
        # @!method acf
        #   Calculate Autocorrelation coefficient
        #   @param [Integer] max_lags (nil) Number of initial lags
        # @!method ema
        #   Calculate exponential moving average.
        #   @param [Integer] n (10) Loopback length.
        #   @param [TrueClass, FalseClass, NilClass] wilder (false) If true,
        #     1/n value is  used for smoothing; if false, uses 2/(n+1) value.
        # @!method rolling_mean
        #   Calculate moving averages
        #   @param [Integer] n (10) Loopback length. Default to 10.
        # @!method rolling_median
        #   Calculate moving median
        #   @param [Integer] n (10) Loopback length. Default to 10.
        # @!method rolling_max
        #   Calculate moving max
        #   @param [Integer] n (10) Loopback length. Default to 10.
        # @!method rolling_min
        #   Calculate moving min
        #   @param [Integer] n (10) Loopback length. Default to 10.
        # @!method rolling_count
        #   Calculate moving non-missing count
        #   @param [Integer] n (10) Loopback length. Default to 10.
        # @!method rolling_std
        #   Calculate moving standard deviation
        #   @param [Integer] n (10) Loopback length. Default to 10.
        # @!method rolling_variance
        #   Calculate moving variance
        #   @param [Integer] n (10) Loopback length. Default to 10.
        [
          :cumsum,:standardize,:acf,:ema,:rolling_mean,:rolling_median,:rolling_max,
          :rolling_min,:rolling_count,:rolling_std,:rolling_variance, :rolling_sum
        ].each do |meth|
          define_method(meth) do |*args|
            apply_method_to_numerics meth, *args
          end
        end

        # Create a summary of mean, standard deviation, count, max and min of
        # each numeric vector in the dataframe in one shot.
        #
        # == Arguments
        #
        # +methods+ - An array with aggregation methods specified as symbols to
        # be applied to numeric vectors. Default is [:count, :mean, :std, :max,
        # :min]. Methods will be applied in the specified order.
        def describe methods=nil
          methods ||= [:count, :mean, :std, :min, :max]

          description_hash = {}
          numeric_vectors.each do |vec|
            description_hash[vec] = methods.map { |m| self[vec].send(m) }
          end
          Daru::DataFrame.new(description_hash, index: methods)
        end

        # The percent_change method computes the percent change over
        # the given number of periods for numeric vectors.
        #
        # @param [Integer] periods (1) number of nils to insert at the beginning.
        #
        # @example
        #
        #   df = Daru::DataFrame.new({
        #        'col0' => [1,2,3,4,5,6],
        #        'col2' => ['a','b','c','d','e','f'],
        #        'col1' => [11,22,33,44,55,66]
        #        },
        #        index: ['one', 'two', 'three', 'four', 'five', 'six'],
        #        order: ['col0', 'col1', 'col2'])
        #   df.percent_change
        #   #=>
        #   #   <Daru::DataFrame:23513280 @rows: 6 @cols: 2>
        #   #              col0                col1
        #   #   one
        #   #   two	   1.0	               1.0
        #   #   three	   0.5                 0.5
        #   #   four	   0.3333333333333333  0.3333333333333333
        #   #   five       0.25                0.25
        #   #   six        0.2                 0.2
        def percent_change periods=1
          df_numeric = only_numerics.vectors.to_a
          df = Daru::DataFrame.new({}, order: @order, index: @index, name: @name)
          df_numeric.each do |vec|
            df[vec] = self[vec].percent_change periods
          end
          df
        end

        # Calculate sample variance-covariance between the numeric vectors.
        def covariance
          cache = Hash.new do |h, (col, row)|
            h[[col, row]] = vector_cov(self[row],self[col])
          end
          vectors = numeric_vectors

          mat_rows = vectors.collect do |row|
            vectors.collect do |col|
              if row == col
                self[row].variance
              else
                cache[[col,row]]
              end
            end
          end

          Daru::DataFrame.rows(mat_rows, index: numeric_vectors, order: numeric_vectors)
        end

        alias :cov :covariance

        # Calculate the correlation between the numeric vectors.
        def correlation
          standard_deviation = std.to_matrix
          corr_arry = cov
                      .to_matrix
                      .elementwise_division(standard_deviation.transpose *
            standard_deviation).to_a

          Daru::DataFrame.rows(corr_arry, index: numeric_vectors, order: numeric_vectors)
        end

        alias :corr :correlation

        private

        def apply_method_to_numerics method, *args
          numerics = @vectors.to_a.map { |n| [n, @data[@vectors[n]]] }
                             .select { |_n, v| v.numeric? }
          computed = numerics.map { |_n, v| v.send(method, *args) }

          Daru::DataFrame.new(computed, index: @index, order: numerics.map(&:first), clone: false)
        end

        def vector_cov v1a, v2a
          sum_of_squares(v1a,v2a) / (v1a.size - 1)
        end

        def sum_of_squares v1, v2
          v1a,v2a = v1.reject_values(*Daru::MISSING_VALUES),v2.reject_values(*Daru::MISSING_VALUES)
          v1a.reset_index!
          v2a.reset_index!
          m1 = v1a.mean
          m2 = v2a.mean
          v1a.size.times.inject(0) { |ac,i| ac+(v1a[i]-m1)*(v2a[i]-m2) }
        end

        def compute_stats method
          Daru::Vector.new(
            numeric_vectors.each_with_object({}) do |vec, hash|
              hash[vec] = self[vec].send(method)
            end, name: method
          )
        end
        alias :sds :std
        alias :variance :variance_sample
      end
    end
  end
end