module Daru
  module Maths
    # Encapsulates statistics methods for vectors. Most basic stuff like mean, etc.
    # is done inside the wrapper, so that native methods can be used for most of
    # the computationally intensive tasks.
    module Statistics
      module Vector # rubocop:disable Metrics/ModuleLength
        extend Gem::Deprecate

        def mean
          @data.mean
        end

        def sum
          @data.sum
        end

        def product
          @data.product
        end

        def min
          @data.min
        end

        def range
          max - min
        end

        def median
          @data.respond_to?(:median) ? @data.median : percentile(50)
        end

        def mode
          mode = frequencies.to_h.select { |_,v| v == frequencies.max }.keys
          mode.size > 1 ? Daru::Vector.new(mode) : mode.first
        end

        # Create a summary of count, mean, standard deviation, min and max of
        # the vector in one shot.
        #
        # == Arguments
        #
        # +methods+ - An array with aggregation methods specified as symbols to
        # be applied to vectors. Default is [:count, :mean, :std, :max,
        # :min]. Methods will be applied in the specified order.
        def describe methods=nil
          methods ||= [:count, :mean, :std, :min, :max]
          description = methods.map { |m| send(m) }
          Daru::Vector.new(description, index: methods, name: :statistics)
        end

        def median_absolute_deviation
          m = median
          recode { |val| (val - m).abs }.median
        end

        alias :mad :median_absolute_deviation

        def standard_error
          standard_deviation_sample/Math.sqrt(size - count_values(*Daru::MISSING_VALUES))
        end

        def sum_of_squared_deviation
          (@data.inject(0) { |a,x| x**2 + a } - (sum**2).quo(size - count_values(*Daru::MISSING_VALUES)).to_f).to_f
        end

        # Retrieve unique values of non-nil data
        def factors
          reject_values(*Daru::MISSING_VALUES).uniq.reset_index!
        end

        # Maximum element of the vector.
        #
        # @param return_type [Symbol] Data type of the returned value. Defaults
        #   to returning only the maximum number but passing *:vector* will return
        #   a Daru::Vector with the index of the corresponding maximum value.
        def max return_type=:stored_type
          max_value = @data.max
          if return_type == :vector
            Daru::Vector.new({index_of(max_value) => max_value}, name: @name, dtype: @dtype)
          else
            max_value
          end
        end

        # Return a Vector with the max element and its index.
        # @return [Daru::Vector]
        def max_index
          max :vector
        end

        def frequencies
          Daru::Vector.new(
            @data.each_with_object(Hash.new(0)) do |element, hash|
              hash[element] += 1 unless element.nil?
            end
          )
        end

        alias_method :freqs, :frequencies
        deprecate :freqs, :frequencies, 2016, 10

        def proportions
          len = size - count_values(*Daru::MISSING_VALUES)
          frequencies.to_h.each_with_object({}) do |(el, count), hash|
            hash[el] = count / len
          end
        end

        def ranked
          sum = 0
          r = frequencies.to_h.sort.each_with_object({}) do |(el, count), memo|
            memo[el] = ((sum + 1) + (sum + count)).quo(2)
            sum += count
          end

          recode { |e| r[e] }
        end

        def coefficient_of_variation
          standard_deviation_sample / mean
        end

        # Retrieves number of cases which comply condition. If block given,
        # retrieves number of instances where block returns true. If other
        # values given, retrieves the frequency for this value. If no value
        # given, counts the number of non-nil elements in the Vector.
        def count value=false, &block
          if block_given?
            @data.select(&block).count
          elsif value
            count { |val| val == value }
          else
            size - indexes(*Daru::MISSING_VALUES).size
          end
        end

        # Count number of occurrences of each value in the Vector
        def value_counts
          values = @data.each_with_object(Hash.new(0)) do |d, memo|
            memo[d] += 1
          end

          Daru::Vector.new(values)
        end

        def proportion value=1
          frequencies[value].quo(size - count_values(*Daru::MISSING_VALUES)).to_f
        end

        # Sample variance with denominator (N-1)
        def variance_sample m=nil
          m ||= mean
          if @data.respond_to? :variance_sample
            @data.variance_sample m
          else
            sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES) - 1)
          end
        end

        # Population variance with denominator (N)
        def variance_population m=nil
          m ||= mean
          if @data.respond_to? :variance_population
            @data.variance_population m
          else
            sum_of_squares(m).quo(size - count_values(*Daru::MISSING_VALUES)).to_f
          end
        end

        # Sample covariance with denominator (N-1)
        def covariance_sample other
          size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
          covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES) - 1)
        end

        # Population covariance with denominator (N)
        def covariance_population other
          size == other.size or raise ArgumentError, 'size of both the vectors must be equal'
          covariance_sum(other) / (size - count_values(*Daru::MISSING_VALUES))
        end

        def sum_of_squares(m=nil)
          m ||= mean
          reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
            memo + (val - m)**2
          }
        end

        def standard_deviation_population m=nil
          m ||= mean
          if @data.respond_to? :standard_deviation_population
            @data.standard_deviation_population(m)
          else
            Math.sqrt(variance_population(m))
          end
        end

        def standard_deviation_sample m=nil
          m ||= mean
          if @data.respond_to? :standard_deviation_sample
            @data.standard_deviation_sample m
          else
            Math.sqrt(variance_sample(m))
          end
        end

        # Calculate skewness using (sigma(xi - mean)^3)/((N)*std_dev_sample^3)
        def skew m=nil
          if @data.respond_to? :skew
            @data.skew
          else
            m ||= mean
            th  = @data.inject(0) { |memo, val| memo + ((val - m)**3) }
            th.quo((size - indexes(*Daru::MISSING_VALUES).size) * (standard_deviation_sample(m)**3))
          end
        end

        def kurtosis m=nil
          if @data.respond_to? :kurtosis
            @data.kurtosis
          else
            m ||= mean
            fo  = @data.inject(0) { |a, x| a + ((x - m) ** 4) }
            fo.quo((size - indexes(*Daru::MISSING_VALUES).size) * standard_deviation_sample(m) ** 4) - 3
          end
        end

        def average_deviation_population m=nil
          must_be_numeric!
          m ||= mean
          reject_values(*Daru::MISSING_VALUES).data.inject(0) { |memo, val|
            (val - m).abs + memo
          }.quo(size - count_values(*Daru::MISSING_VALUES))
        end

        # Returns the value of the percentile q
        #
        # Accepts an optional second argument specifying the strategy to interpolate
        # when the requested percentile lies between two data points a and b
        # Valid strategies are:
        # * :midpoint (Default): (a + b) / 2
        # * :linear : a + (b - a) * d where d is the decimal part of the index between a and b.
        # == References
        #
        # This is the NIST recommended method (http://en.wikipedia.org/wiki/Percentile#NIST_method)
        def percentile(q, strategy=:midpoint)
          case strategy
          when :midpoint
            midpoint_percentile(q)
          when :linear
            linear_percentile(q)
          else
            raise ArgumentError, "Unknown strategy #{strategy}"
          end
        end

        # Dichotomize the vector with 0 and 1, based on lowest value.
        # If parameter is defined, this value and lower will be 0
        # and higher, 1.
        def dichotomize(low=nil)
          low ||= factors.min

          recode do |x|
            if x.nil?
              nil
            elsif x > low
              1
            else
              0
            end
          end
        end

        # Center data by subtracting the mean from each non-nil value.
        def center
          self - mean
        end

        # Standardize data.
        #
        # == Arguments
        #
        # * use_population - Pass as *true* if you want to use population
        # standard deviation instead of sample standard deviation.
        def standardize use_population=false
          m ||= mean
          sd = use_population ? sdp : sds
          return Daru::Vector.new([nil]*size) if m.nil? || sd == 0.0

          vector_standardized_compute m, sd
        end

        # :nocov:
        def box_cox_transformation lambda # :nodoc:
          must_be_numeric!

          recode do |x|
            if !x.nil?
              if lambda.zero?
                Math.log(x)
              else
                (x ** lambda - 1).quo(lambda)
              end
            else
              nil
            end
          end
        end
        # :nocov:

        # Replace each non-nil value in the vector with its percentile.
        def vector_percentile
          c = size - indexes(*Daru::MISSING_VALUES).size
          ranked.recode! { |i| i.nil? ? nil : (i.quo(c)*100).to_f }
        end

        def vector_standardized_compute(m,sd)
          if @data.respond_to? :vector_standardized_compute
            @data.vector_standardized_compute(m,sd)
          else
            Daru::Vector.new @data.collect { |x| x.nil? ? nil : (x.to_f - m).quo(sd) },
              index: index, name: name, dtype: dtype
          end
        end

        def vector_centered_compute(m)
          if @data.respond_to? :vector_centered_compute
            @data.vector_centered_compute(m)
          else
            Daru::Vector.new @data.collect { |x| x.nil? ? nil : x.to_f-m },
              index: index, name: name, dtype: dtype
          end
        end

        # Returns an random sample of size n, with replacement,
        # only with non-nil data.
        #
        # In all the trails, every item have the same probability
        # of been selected.
        def sample_with_replacement(sample=1)
          if @data.respond_to? :sample_with_replacement
            @data.sample_with_replacement sample
          else
            valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
            vds = valid.size
            (0...sample).collect { valid[rand(vds)] }
          end
        end

        # Returns an random sample of size n, without replacement,
        # only with valid data.
        #
        # Every element could only be selected once.
        #
        # A sample of the same size of the vector is the vector itself.
        def sample_without_replacement(sample=1)
          if @data.respond_to? :sample_without_replacement
            @data.sample_without_replacement sample
          else
            raw_sample_without_replacement(sample)
          end
        end

        # The percent_change method computes the percent change over
        # the given number of periods.
        #
        # @param [Integer] periods (1) number of nils to insert at the beginning.
        #
        # @example
        #
        #   vector = Daru::Vector.new([4,6,6,8,10],index: ['a','f','t','i','k'])
        #   vector.percent_change
        #   #=>
        #   #   <Daru::Vector:28713060 @name = nil size: 5 >
        #   #              nil
        #   #   a
        #   #   f	   0.5
        #   #   t	   0.0
        #   #   i	   0.3333333333333333
        #   #   k          0.25
        def percent_change periods=1
          must_be_numeric!

          prev = nil
          arr = @data.each_with_index.map do |cur, i|
            if i < periods ||
               include_with_nan?(Daru::MISSING_VALUES, cur) ||
               include_with_nan?(Daru::MISSING_VALUES, prev)
              nil
            else
              (cur - prev) / prev.to_f
            end.tap { prev = cur if cur }
          end

          Daru::Vector.new(arr, index: @index, name: @name)
        end

        # Performs the difference of the series.
        # Note: The first difference of series is X(t) - X(t-1)
        # But, second difference of series is NOT X(t) - X(t-2)
        # It is the first difference of the first difference
        # => (X(t) - X(t-1)) - (X(t-1) - X(t-2))
        #
        # == Arguments
        #
        # * *max_lags*: integer, (default: 1), number of differences reqd.
        #
        # @example Using #diff
        #
        #   ts = Daru::Vector.new((1..10).map { rand })
        #            # => [0.69, 0.23, 0.44, 0.71, ...]
        #
        #   ts.diff   # => [nil, -0.46, 0.21, 0.27, ...]
        #
        # @return [Daru::Vector]
        def diff(max_lags=1)
          ts = self
          difference = []
          max_lags.times do
            difference = ts - ts.lag
            ts = difference
          end
          difference
        end

        # Calculate the rolling function for a loopback value.
        #
        # @param [Symbol] function The rolling function to be applied. Can be
        #   any function applicatble to Daru::Vector (:mean, :median, :count,
        #   :min, :max, etc.)
        # @param [Integer] n (10) A non-negative value which serves as the loopback length.
        # @return [Daru::Vector] Vector containin rolling calculations.
        # @example Using #rolling
        #   ts = Daru::Vector.new((1..100).map { rand })
        #            # => [0.69, 0.23, 0.44, 0.71, ...]
        #   # first 9 observations are nil
        #   ts.rolling(:mean)    # => [ ... nil, 0.484... , 0.445... , 0.513 ... , ... ]
        def rolling function, n=10
          Daru::Vector.new(
            [nil] * (n - 1) +
            (0..(size - n)).map do |i|
              Daru::Vector.new(@data[i...(i + n)]).send(function)
            end, index: @index
          )
        end

        # @!method rolling_mean
        #   Calculate rolling average
        #   @param [Integer] n (10) Loopback length
        # @!method rolling_median
        #   Calculate rolling median
        #   @param [Integer] n (10) Loopback length
        # @!method rolling_count
        #   Calculate rolling non-missing count
        #   @param [Integer] n (10) Loopback length
        # @!method rolling_max
        #   Calculate rolling max value
        #   @param [Integer] n (10) Loopback length
        # @!method rolling_min
        #   Calculate rolling min value
        #   @param [Integer] n (10) Loopback length
        # @!method rolling_sum
        #   Calculate rolling sum
        #   @param [Integer] n (10) Loopback length
        # @!method rolling_std
        #   Calculate rolling standard deviation
        #   @param [Integer] n (10) Loopback length
        # @!method rolling_variance
        #   Calculate rolling variance
        #   @param [Integer] n (10) Loopback length
        [:count, :mean, :median, :max, :min, :sum, :std, :variance].each do |meth|
          define_method("rolling_#{meth}".to_sym) do |n=10|
            rolling(meth, n)
          end
        end

        # Exponential Moving Average.
        # Calculates an exponential moving average of the series using a
        # specified parameter. If wilder is false (the default) then the EMA
        # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
        # Welles Wilder smoother of 1 / n.
        #
        # Warning for EMA usage: EMAs are unstable for small series, as they
        # use a lot more than n observations to calculate. The series is stable
        # if the size of the series is >= 3.45 * (n + 1)
        #
        # @param [Integer] n (10) Loopback length.
        # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
        #   used for smoothing; if false, uses 2/(n+1) value
        #
        # @example Using ema
        #
        #   ts = Daru::Vector.new((1..100).map { rand })
        #            # => [0.577..., 0.123..., 0.173..., 0.233..., ...]
        #
        #   # first 9 observations are nil
        #   ts.ema   # => [ ... nil, 0.455... , 0.395..., 0.323..., ... ]
        #
        # @return [Daru::Vector] Contains EMA
        def ema(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
          smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
          # need to start everything from the first non-nil observation
          start = @data.index { |i| !i.nil? }
          # first n - 1 observations are nil
          base = [nil] * (start + n - 1)
          # nth observation is just a moving average
          base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
          (start + n).upto size - 1 do |i|
            base << self[i] * smoother + (1 - smoother) * base.last
          end

          Daru::Vector.new(base, index: @index, name: @name)
        end

        # Exponential Moving Variance.
        # Calculates an exponential moving variance of the series using a
        # specified parameter. If wilder is false (the default) then the EMV
        # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
        # Welles Wilder smoother of 1 / n.
        #
        # @param [Integer] n (10) Loopback length.
        # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
        #   used for smoothing; if false, uses 2/(n+1) value
        #
        # @example Using emv
        #
        #   ts = Daru::Vector.new((1..100).map { rand })
        #            # => [0.047..., 0.23..., 0.836..., 0.845..., ...]
        #
        #   # first 9 observations are nil
        #   ts.emv   # => [ ... nil, 0.073... , 0.082..., 0.080..., ...]
        #
        # @return [Daru::Vector] contains EMV
        def emv(n=10, wilder=false) # rubocop:disable Metrics/AbcSize
          smoother = wilder ? 1.0 / n : 2.0 / (n + 1)
          # need to start everything from the first non-nil observation
          start = @data.index { |i| !i.nil? }
          # first n - 1 observations are nil
          var_base = [nil] * (start + n - 1)
          mean_base = [nil] * (start + n - 1)
          mean_base << @data[start...(start + n)].inject(0.0) { |s, a| a.nil? ? s : s + a } / n
          # nth observation is just a moving variance_population
          var_base << @data[start...(start + n)].inject(0.0) { |s,x| x.nil? ? s : s + (x - mean_base.last)**2 } / n
          (start + n).upto size - 1 do |i|
            last = mean_base.last
            mean_base << self[i] * smoother + (1 - smoother) * last
            var_base << (1 - smoother) * var_base.last + smoother * (self[i] - last) * (self[i] - mean_base.last)
          end
          Daru::Vector.new(var_base, index: @index, name: @name)
        end

        # Exponential Moving Standard Deviation.
        # Calculates an exponential moving standard deviation of the series using a
        # specified parameter. If wilder is false (the default) then the EMSD
        # uses a smoothing value of 2 / (n + 1), if it is true then it uses the
        # Welles Wilder smoother of 1 / n.
        #
        # @param [Integer] n (10) Loopback length.
        # @param [TrueClass, FalseClass] wilder (false) If true, 1/n value is
        #   used for smoothing; if false, uses 2/(n+1) value
        #
        # @example Using emsd
        #
        #   ts = Daru::Vector.new((1..100).map { rand })
        #            # => [0.400..., 0.727..., 0.862..., 0.013..., ...]
        #
        #   # first 9 observations are nil
        #   ts.emsd   # => [ ... nil, 0.285... , 0.258..., 0.243..., ...]
        #
        # @return [Daru::Vector] contains EMSD
        def emsd(n=10, wilder=false)
          result = []
          emv_return = emv(n, wilder)
          emv_return.each do |d|
            result << (d.nil? ? nil : Math.sqrt(d))
          end
          Daru::Vector.new(result, index: @index, name: @name)
        end

        # Moving Average Convergence-Divergence.
        # Calculates the MACD (moving average convergence-divergence) of the time
        # series - this is a comparison of a fast EMA with a slow EMA.
        #
        # == Arguments
        # * *fast*: integer, (default = 12) - fast component of MACD
        # * *slow*: integer, (default = 26) - slow component of MACD
        # * *signal*: integer, (default = 9) - signal component of MACD
        #
        # == Usage
        #
        #   ts = Daru::Vector.new((1..100).map { rand })
        #            # => [0.69, 0.23, 0.44, 0.71, ...]
        #   ts.macd(13)
        #
        # == Returns
        #
        # Array of two Daru::Vectors - comparison of fast EMA with slow and EMA with
        # signal value
        def macd(fast=12, slow=26, signal=9)
          series = ema(fast) - ema(slow)
          [series, series.ema(signal)]
        end

        # Calculates the autocorrelation coefficients of the series.
        #
        # The first element is always 1, since that is the correlation
        # of the series with itself.
        #
        # @example
        #   ts = Daru::Vector.new((1..100).map { rand })
        #
        #   ts.acf   # => array with first 21 autocorrelations
        #   ts.acf 3 # => array with first 3 autocorrelations
        def acf(max_lags=nil)
          max_lags ||= (10 * Math.log10(size)).to_i

          (0..max_lags).map do |i|
            if i.zero?
              1.0
            else
              m = mean
              # can't use Pearson coefficient since the mean for the lagged series should
              # be the same as the regular series
              ((self - m) * (lag(i) - m)).sum / variance_sample / (size - 1)
            end
          end
        end

        # Provides autocovariance.
        #
        # == Options
        #
        # * *:demean* = true; optional. Supply false if series is not to be demeaned
        # * *:unbiased* = true; optional. true/false for unbiased/biased form of autocovariance
        #
        # == Returns
        #
        # Autocovariance value
        def acvf(demean=true, unbiased=true) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
          opts = {
            demean: true,
            unbaised: true
          }.merge(opts)

          demean   = opts[:demean]
          unbiased = opts[:unbiased]
          demeaned_series = demean ? self - mean : self

          n = (10 * Math.log10(size)).to_i + 1
          m = mean
          d = if unbiased
                Array.new(size, size)
              else
                (1..size).to_a.reverse[0..n]
              end

          0.upto(n - 1).map do |i|
            (demeaned_series * (lag(i) - m)).sum / d[i]
          end
        end

        # Calculate cumulative sum of Vector
        def cumsum
          result = []
          acc = 0
          @data.each do |d|
            if include_with_nan? Daru::MISSING_VALUES, d
              result << nil
            else
              acc += d
              result << acc
            end
          end

          Daru::Vector.new(result, index: @index)
        end

        alias :sdp :standard_deviation_population
        alias :sds :standard_deviation_sample
        alias :std :sds
        alias :adp :average_deviation_population
        alias :cov :coefficient_of_variation
        alias :variance :variance_sample
        alias :covariance :covariance_sample
        alias :sd :standard_deviation_sample
        alias :ss :sum_of_squares
        alias :percentil :percentile
        alias :se :standard_error

        private

        def must_be_numeric!
          numeric? or raise TypeError, 'Vector must be numeric'
        end

        def covariance_sum other
          self_mean = mean
          other_mean = other.mean
          @data
            .zip(other.data).inject(0) do |res, (d, o)|
              res + if !d || !o
                      0
                    else
                      (d - self_mean) * (o - other_mean)
                    end
            end
        end

        def midpoint_percentile(q) # rubocop:disable Metrics/AbcSize
          sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort

          v = ((size - count_values(*Daru::MISSING_VALUES)) * q).quo(100)
          if v.to_i!=v
            sorted[v.to_i]
          else
            (sorted[(v-0.5).to_i].to_f + sorted[(v+0.5).to_i]).quo(2)
          end
        end

        def linear_percentile(q) # rubocop:disable Metrics/AbcSize
          sorted = reject_values(*Daru::MISSING_VALUES).to_a.sort
          index = (q / 100.0) * ((size - count_values(*Daru::MISSING_VALUES)) + 1)

          k = index.truncate
          d = index % 1

          if k.zero?
            sorted[0]
          elsif k >= sorted.size
            sorted[-1]
          else
            sorted[k - 1] + d * (sorted[k] - sorted[k - 1])
          end
        end

        def raw_sample_without_replacement sample
          valid = indexes(*Daru::MISSING_VALUES).empty? ? self : reject_values(*Daru::MISSING_VALUES)
          raise ArgumentError, "Sample size couldn't be greater than n" if
            sample > valid.size
          out  = []
          size = valid.size
          while out.size < sample
            value = rand(size)
            out.push(value) unless out.include?(value)
          end

          out.collect { |i| valid[i] }
        end
      end
    end
  end
end