module Statsample module Factor # Performs Horn's 'parallel analysis' to a principal components analysis # to adjust for sample bias in the retention of components. # Can create the bootstrap samples using parameters (mean and standard # deviation of each variable) or sampling for actual data. # == Description # "PA involves the construction of a number of correlation matrices of random variables based on the same sample size and number of variables in the real data set. The average eigenvalues from the random correlation matrices are then compared to the eigenvalues from the real data correlation matrix, such that the first observed eigenvalue is compared to the first random eigenvalue, the second observed eigenvalue is compared to the second random eigenvalue, and so on." (Hayton, Allen & Scarpello, 2004, p.194) # == Usage # # ds should be any valid dataset # pa=Statsample::Factor::ParallelAnalysis.new(ds, :iterations=>100, :bootstrap_method=>:raw_data) # == References: # * Hayton, J., Allen, D. & Scarpello, V.(2004). Factor Retention Decisions in Exploratory Factor Analysis: a Tutorial on Parallel Analysis. Organizational Research Methods, 7 (2), 191-205. # * https://people.ok.ubc.ca/brioconn/nfactors/nfactors.html (for inspiration) class ParallelAnalysis include DirtyMemoize # Number of random sets to produce. 50 by default attr_accessor :iterations # Name of analysis attr_accessor :name # Dataset. You could use mock vectors when use bootstrap method attr_reader :ds # Bootstrap method. :raw_data used by default # * :parameter: uses mean and standard deviation of each variable # * :raw_data : sample with replacement from actual data. # attr_accessor :bootstrap_method # Factor method. # Could be Statsample::Factor::PCA or Statsample::Factor::PrincipalAxis. # PCA used by default. attr_accessor :factor_class # Percentil over bootstrap eigenvalue should be accepted. 95 by default attr_accessor :percentil # Correlation matrix used with :raw_data . :correlation_matrix used by default attr_accessor :matrix_method # Dataset with bootstrapped eigenvalues attr_reader :ds_eigenvalues # Show extra information if true attr_accessor :debug def initialize(ds, opts=Hash.new) @ds=ds @fields=@ds.fields @n_variables=@fields.size @n_cases=ds.cases opts_default={ :name=>"Parallel Analysis", :iterations=>50, :bootstrap_method => :raw_data, :factor_class => Statsample::Factor::PCA, :percentil=>95, :debug=>false, :matrix_method=>:correlation_matrix } @opts=opts_default.merge(opts) @opts[:matrix_method]==:correlation_matrix if @opts[:bootstrap_method]==:parameters opts_default.keys.each {|k| send("#{k}=", @opts[k]) } end # Summary of results def summary ReportBuilder.new(:no_title=>true).add(self).to_text end # Number of factor to retent def number_of_factors total=0 ds_eigenvalues.fields.each_with_index do |f,i| total+=1 if (@original[i]>0 and @original[i]>ds_eigenvalues[f].percentil(percentil)) end total end def report_building(g) #:nodoc: g.section(:name=>@name) do |s| s.text "Bootstrap Method: #{bootstrap_method}" s.text "Correlation Matrix type : #{matrix_method}" s.text "Number of variables: #{@n_variables}" s.text "Number of cases: #{@n_cases}" s.text "Number of iterations: #{@iterations}" s.text "Number or factors to preserve: #{number_of_factors}" s.table(:name=>"Eigenvalues", :header=>["Eigenvalue", "actual", "mean","p.#{percentil}","preserve?"]) do |t| ds_eigenvalues.fields.each_with_index do |f,i| v=ds_eigenvalues[f] t.row [i+1, "%0.4f" % @original[i], "%0.4f" % v.mean, "%0.4f" % v.percentil(percentil), (v.percentil(percentil)>0 and @original[i] > v.percentil(percentil)) ? "Yes":""] end end end end # Perform calculation. Shouldn't be called directly for the user def compute @original=factor_class.new(Statsample::Bivariate.correlation_matrix(@ds), :m=>@n_variables).eigenvalues.sort.reverse @ds_eigenvalues=Statsample::Dataset.new((1..@n_variables).map{|v| "ev_%05d" % v}) @ds_eigenvalues.fields.each {|f| @ds_eigenvalues[f].type=:scale} @iterations.times do |i| # Create a dataset of dummy values ds_bootstrap=Statsample::Dataset.new(@ds.fields) if bootstrap_method==:parameter rng = GSL::Rng.alloc() end @fields.each do |f| if bootstrap_method==:parameter ds_bootstrap[f]=@n_cases.times.map {|c| rng.gaussian( @ds[f].sd)+@ds[f].mean}.to_scale elsif bootstrap_method==:raw_data ds_bootstrap[f]=ds[f].sample_with_replacement(@n_cases).to_scale end end fa=factor_class.new(Statsample::Bivariate.send(matrix_method, ds_bootstrap), :m=>@n_variables) ev=fa.eigenvalues.sort.reverse @ds_eigenvalues.add_case_array(ev) puts "iteration #{i}" if $DEBUG or debug end @ds_eigenvalues.update_valid_data end dirty_memoize :number_of_factors, :ds_eigenvalues dirty_writer :iterations, :bootstrap_method, :factor_class, :percentil end end end