# frozen_string_literal: true

require 'rumale/base/estimator'
require 'rumale/base/cluster_analyzer'
require 'rumale/pairwise_metric'
require 'rumale/utils'
require 'rumale/validation'
require 'rumale/clustering/k_means'

module Rumale
  module Clustering
    # SpectralClustering is a class that implements the normalized spectral clustering.
    #
    # @example
    #   require 'numo/linalg/autoloader'
    #   require 'rumale/clustering/spectral_clustering'
    #
    #   analyzer = Rumale::Clustering::SpectralClustering.new(n_clusters: 10, gamma: 8.0)
    #   cluster_labels = analyzer.fit_predict(samples)
    #
    # *Reference*
    # - Ng, A Y., Jordan, M I., and Weiss, Y., "On Spectral Clustering: Analyssi and an algorithm," Proc. NIPS'01, pp. 849--856, 2001.
    # - von Luxburg, U., "A tutorial on spectral clustering," Statistics and Computing, Vol. 17 (4), pp. 395--416, 2007.
    class SpectralClustering < ::Rumale::Base::Estimator
      include ::Rumale::Base::ClusterAnalyzer

      # Return the data in embedded space.
      # @return [Numo::DFloat] (shape: [n_samples, n_clusters])
      attr_reader :embedding

      # Return the cluster labels.
      # @return [Numo::Int32] (shape: [n_samples])
      attr_reader :labels

      # Create a new cluster analyzer with normalized spectral clustering.
      #
      # @param n_clusters [Integer] The number of clusters.
      # @param affinity [String] The representation of affinity matrix ('rbf' or 'precomputed').
      #   If affinity = 'rbf', the class performs the normalized spectral clustering with the fully connected graph weighted by rbf kernel.
      # @param gamma [Float] The parameter of rbf kernel, if nil it is 1 / n_features.
      #   If affinity = 'precomputed', this parameter is ignored.
      # @param init [String] The initialization method for centroids of K-Means clustering ('random' or 'k-means++').
      # @param max_iter [Integer] The maximum number of iterations for K-Means clustering.
      # @param tol [Float] The tolerance of termination criterion for K-Means clustering.
      # @param random_seed [Integer] The seed value using to initialize the random generator.
      def initialize(n_clusters: 2, affinity: 'rbf', gamma: nil, init: 'k-means++', max_iter: 10, tol: 1.0e-8, random_seed: nil)
        super()
        @params = {
          n_clusters: n_clusters,
          affinity: affinity,
          gamma: gamma,
          init: (init == 'random' ? 'random' : 'k-means++'),
          max_iter: max_iter,
          tol: tol,
          random_seed: (random_seed || srand)
        }
      end

      # Analysis clusters with given training data.
      # To execute this method, Numo::Linalg must be loaded.
      #
      # @overload fit(x) -> SpectralClustering
      #   @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
      #     If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
      #   @return [SpectralClustering] The learned cluster analyzer itself.
      def fit(x, _y = nil)
        x = ::Rumale::Validation.check_convert_sample_array(x)
        raise ArgumentError, 'the input affinity matrix should be square' if check_invalid_array_shape(x)

        raise 'SpectralClustering#fit requires Numo::Linalg but that is not loaded' unless enable_linalg?(warning: false)

        fit_predict(x)
        self
      end

      # Analysis clusters and assign samples to clusters.
      # To execute this method, Numo::Linalg must be loaded.
      #
      # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for cluster analysis.
      #   If the metric is 'precomputed', x must be a square affinity matrix (shape: [n_samples, n_samples]).
      # @return [Numo::Int32] (shape: [n_samples]) Predicted cluster label per sample.
      def fit_predict(x)
        x = ::Rumale::Validation.check_convert_sample_array(x)
        raise ArgumentError, 'the input affinity matrix should be square' if check_invalid_array_shape(x)

        unless enable_linalg?(warning: false)
          raise 'SpectralClustering#fit_predict requires Numo::Linalg but that is not loaded'
        end

        affinity_mat = @params[:metric] == 'precomputed' ? x : ::Rumale::PairwiseMetric.rbf_kernel(x, nil, @params[:gamma])
        @embedding = embedded_space(affinity_mat, @params[:n_clusters])
        normalized_embedding = ::Rumale::Utils.normalize(@embedding, 'l2')
        @labels = kmeans_clustering(normalized_embedding)
      end

      private

      def check_invalid_array_shape(x)
        @params[:affinity] == 'precomputed' && x.shape[0] != x.shape[1]
      end

      def embedded_space(affinity_mat, n_clusters)
        affinity_mat[affinity_mat.diag_indices] = 0.0
        degrees = 1.0 / Numo::NMath.sqrt(affinity_mat.sum(axis: 1))
        laplacian_mat = degrees.diag.dot(affinity_mat).dot(degrees.diag)

        n_samples = affinity_mat.shape[0]
        _, eig_vecs = Numo::Linalg.eigh(laplacian_mat, vals_range: (n_samples - n_clusters)...n_samples)
        eig_vecs.reverse(1).dup
      end

      def kmeans_clustering(x)
        ::Rumale::Clustering::KMeans.new(
          n_clusters: @params[:n_clusters], init: @params[:init],
          max_iter: @params[:max_iter], tol: @params[:tol], random_seed: @params[:random_seed]
        ).fit_predict(x)
      end
    end
  end
end