# frozen_string_literal: true require 'svmkit/validation' require 'svmkit/base/base_estimator' require 'svmkit/base/transformer' module SVMKit # Module for matrix decomposition algorithms. module Decomposition # PCA is a class that implements Principal Component Analysis. # # @example # decomposer = SVMKit::Decomposition::PCA.new(n_components: 2) # representaion = decomposer.fit_transform(samples) # # *Reference* # - A. Sharma and K K. Paliwal, "Fast principal component analysis using fixed-point algorithm," Pattern Recognition Letters, 28, pp. 1151--1155, 2007. class PCA include Base::BaseEstimator include Base::Transformer include Validation # Returns the principal components. # @return [Numo::DFloat] (shape: [n_components, n_features]) attr_reader :components # Returns the mean vector. # @return [Numo::DFloat] (shape: [n_features] attr_reader :mean # Return the random generator. # @return [Random] attr_reader :rng # Create a new transformer with PCA. # # @param n_components [Integer] The number of principal components. # @param max_iter [Integer] The maximum number of iterations. # @param tol [Float] The tolerance of termination criterion. # @param random_seed [Integer] The seed value using to initialize the random generator. def initialize(n_components: 2, max_iter: 100, tol: 1.0e-4, random_seed: nil) check_params_integer(n_components: n_components, max_iter: max_iter) check_params_float(tol: tol) check_params_type_or_nil(Integer, random_seed: random_seed) check_params_positive(n_components: n_components, max_iter: max_iter, tol: tol) @params = {} @params[:n_components] = n_components @params[:max_iter] = max_iter @params[:tol] = tol @params[:random_seed] = random_seed @params[:random_seed] ||= srand @components = nil @mean = nil @rng = Random.new(@params[:random_seed]) end # Fit the model with given training data. # # @overload fit(x) -> PCA # # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model. # @return [PCA] The learned transformer itself. def fit(x, _y = nil) check_sample_array(x) # initialize some variables. @components = nil n_samples, n_features = x.shape # centering. @mean = x.mean(0) centered_x = x - @mean # optimization. covariance_mat = centered_x.transpose.dot(centered_x) / (n_samples - 1) @params[:n_components].times do comp_vec = random_vec(n_features) @params[:max_iter].times do updated = orthogonalize(covariance_mat.dot(comp_vec)) break if (updated.dot(comp_vec) - 1).abs < @params[:tol] comp_vec = updated end @components = @components.nil? ? comp_vec : Numo::NArray.vstack([@components, comp_vec]) end self end # Fit the model with training data, and then transform them with the learned model. # # @overload fit_transform(x) -> Numo::DFloat # # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model. # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data def fit_transform(x, _y = nil) check_sample_array(x) fit(x).transform(x) end # Transform the given data with the learned model. # # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The data to be transformed with the learned model. # @return [Numo::DFloat] (shape: [n_samples, n_components]) The transformed data. def transform(x) check_sample_array(x) (x - @mean).dot(@components.transpose) end # Inverse transform the given transformed data with the learned model. # # @param z [Numo::DFloat] (shape: [n_samples, n_components]) The data to be restored into original space with the learned model. # @return [Numo::DFloat] (shape: [n_samples, n_featuress]) The restored data. def inverse_transform(z) check_sample_array(z) c = @components.shape[1].nil? ? @components.expand_dims(0) : @components z.dot(c) + @mean end # Dump marshal data. # @return [Hash] The marshal data. def marshal_dump { params: @params, components: @components, mean: @mean, rng: @rng } end # Load marshal data. # @return [nil] def marshal_load(obj) @params = obj[:params] @components = obj[:components] @mean = obj[:mean] @rng = obj[:rng] nil end private def orthogonalize(pcvec) unless @components.nil? delta = @components.dot(pcvec) * @components.transpose delta = delta.sum(1) unless delta.shape[1].nil? pcvec -= delta end pcvec / Math.sqrt((pcvec**2).sum.abs) + 1.0e-12 end def random_vec(n_features) Numo::DFloat[*(Array.new(n_features) { @rng.rand })] end end end end