require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb')) #require 'rserve' #require 'statsample/rserve_extension' class StatsampleFactorTestCase < MiniTest::Unit::TestCase include Statsample::Fixtures # Based on Hardle and Simar def setup @fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures") end # Based on Hurdle example def test_covariance_matrix ds=Statsample::PlainText.read(@fixtures_dir+"/bank2.dat", %w{v1 v2 v3 v4 v5 v6}) ds.fields.each {|f| ds[f]=ds[f].centered } cm=ds.covariance_matrix pca =Statsample::Factor::PCA.new( cm, :m=>6) #puts pca.summary #puts pca.feature_matrix exp_eig=[2.985, 0.931,0.242, 0.194, 0.085, 0.035].to_scale assert_similar_vector(exp_eig, pca.eigenvalues.to_scale, 0.1) pcs=pca.principal_components(ds) k=6 comp_matrix=pca.component_matrix() k.times {|i| pc_id="PC_#{i+1}" k.times {|j| # variable ds_id="v#{j+1}" r= Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id]) assert_in_delta( r, comp_matrix[j,i]) } } end def test_principalcomponents_ruby_gsl ran=Distribution::Normal.rng_ugaussian # @r=::Rserve::Connection.new samples=20 (3..7).each {|k| v={} v["x0"]=samples.times.map { ran.call()}.to_scale.centered (1...k).each {|i| v["x#{i}"]=samples.times.map {|ii| ran.call()*0.5+v["x#{i-1}"][ii]*0.5}.to_scale.centered } ds=v.to_dataset cm=ds.covariance_matrix # @r.assign('ds',ds) # @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors') # puts "eigenvalues" # puts @r.eval('v').to_ruby.to_s pca_ruby=Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>false ) pca_gsl =Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>true ) pc_ruby = pca_ruby.principal_components(ds) pc_gsl = pca_gsl.principal_components(ds) # Test component matrix correlation! cm_ruby=pca_ruby.component_matrix #puts cm_ruby.summary k.times {|i| pc_id="PC_#{i+1}" assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i],1e-10) # Revert gsl component values pc_gsl_data= (pc_gsl[pc_id][0]-pc_ruby[pc_id][0]).abs>1e-6 ? pc_gsl[pc_id].recode {|v| -v} : pc_gsl[pc_id] assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6,"PC for #{k} variables") if false k.times {|j| # variable ds_id="x#{j}" r= Statsample::Bivariate.correlation(ds[ds_id],pc_ruby[pc_id]) puts "#{pc_id}-#{ds_id}:#{r}" } end } } #@r.close end def test_principalcomponents() principalcomponents(true) principalcomponents(false) end def principalcomponents(gsl) ran=Distribution::Normal.rng_ugaussian samples=50 x1=samples.times.map { ran.call()}.to_scale x2=samples.times.map {|i| ran.call()*0.5+x1[i]*0.5}.to_scale ds={'x1'=>x1,'x2'=>x2}.to_dataset cm=ds.correlation_matrix r=cm[0,1] pca=Statsample::Factor::PCA.new(cm,:m=>2,:use_gsl=>gsl) assert_in_delta(1+r,pca.eigenvalues[0],1e-10) assert_in_delta(1-r,pca.eigenvalues[1],1e-10) hs=1.0 / Math.sqrt(2) assert_equal_matrix(hs*Matrix[[1],[1]],pca.eigenvectors[0]) m_1=gsl ? Matrix[[-1],[1]] : Matrix[[1],[-1]] assert_equal_matrix(hs*m_1, pca.eigenvectors[1]) pcs=pca.principal_components(ds) exp_pc_1=ds.collect_with_index {|row,i| hs*(row['x1']+row['x2']) } exp_pc_2=ds.collect_with_index {|row,i| gsl ? hs*(row['x2']-row['x1']) : hs*(row['x1']-row['x2']) } assert_similar_vector(exp_pc_1, pcs["PC_1"]) assert_similar_vector(exp_pc_2, pcs["PC_2"]) end def test_antiimage cor=Matrix[[1,0.964, 0.312],[0.964,1,0.411],[0.312,0.411,1]] expected=Matrix[[0.062,-0.057, 0.074],[-0.057, 0.057, -0.089], [0.074, -0.089, 0.729]] ai=Statsample::Factor.anti_image_covariance_matrix(cor) assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected.to_s} not equal to #{ai.to_s}") end def test_kmo @v1=[1 ,2 ,3 ,4 ,7 ,8 ,9 ,10,14,15,20,50,60,70].to_scale @v2=[5 ,6 ,11,12,13,16,17,18,19,20,30,0,0,0].to_scale @v3=[10,3 ,20,30,40,50,80,10,20,30,40,2,3,4].to_scale # KMO: 0.490 ds={'v1'=>@v1,'v2'=>@v2,'v3'=>@v3}.to_dataset cor=Statsample::Bivariate.correlation_matrix(ds) kmo=Statsample::Factor.kmo(cor) assert_in_delta(0.667, kmo,0.001) assert_in_delta(0.81, Statsample::Factor.kmo(harman_817),0.01) end def test_kmo_univariate m=harman_817 expected=[0.73,0.76,0.84,0.87,0.53,0.93,0.78,0.86] m.row_size.times.map {|i| assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m,i),0.01) } end def test_parallelanalysis_with_data if Statsample.has_gsl? samples=100 variables=10 iterations=50 rng = Distribution::Normal.rng_ugaussian f1=samples.times.collect {rng.call}.to_scale f2=samples.times.collect {rng.call}.to_scale vectors={} variables.times do |i| if i<5 vectors["v#{i}"]=samples.times.collect {|nv| f1[nv]*5+f2[nv]*2+rng.call }.to_scale else vectors["v#{i}"]=samples.times.collect {|nv| f2[nv]*5+f1[nv]*2+rng.call }.to_scale end end ds=vectors.to_dataset pa1=Statsample::Factor::ParallelAnalysis.new(ds, :bootstrap_method=>:data, :iterations=>iterations) pa2=Statsample::Factor::ParallelAnalysis.with_random_data(samples,variables,:iterations=>iterations,:percentil=>95) 3.times do |n| var="ev_0000#{n+1}" assert_in_delta(pa1.ds_eigenvalues[var].mean,pa2.ds_eigenvalues[var].mean,0.04) end else skip("Too slow without GSL") end end def test_parallelanalysis pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,:iterations=>100,:percentil=>95) assert_in_delta(1.2454, pa.ds_eigenvalues['ev_00001'].mean, 0.01) assert_in_delta(1.1542, pa.ds_eigenvalues['ev_00002'].mean, 0.01) assert_in_delta(1.0836, pa.ds_eigenvalues['ev_00003'].mean, 0.01) #puts pa.summary assert(pa.summary.size>0) #pa=Statsample::Factor::ParallelAnalysis.with_random_data(305,8,100, 95, true) #puts pa.summary end def test_map #fields=%w{height arm.span forearm lower.leg weight bitro.diameter chest.girth chest.width} m=Matrix[ [ 1, 0.846, 0.805, 0.859, 0.473, 0.398, 0.301, 0.382], [ 0.846, 1, 0.881, 0.826, 0.376, 0.326, 0.277, 0.415], [ 0.805, 0.881, 1, 0.801, 0.38, 0.319, 0.237, 0.345], [ 0.859, 0.826, 0.801, 1, 0.436, 0.329, 0.327, 0.365], [ 0.473, 0.376, 0.38, 0.436, 1, 0.762, 0.73, 0.629], [ 0.398, 0.326, 0.319, 0.329, 0.762, 1, 0.583, 0.577], [ 0.301, 0.277, 0.237, 0.327, 0.73, 0.583, 1, 0.539], [ 0.382, 0.415, 0.345, 0.365, 0.629, 0.577, 0.539, 1] ] map=Statsample::Factor::MAP.new(m) assert_in_delta(map.minfm, 0.066445,0.00001) assert_equal(map.number_of_factors, 2) assert_in_delta(map.fm[0], 0.312475,0.00001) assert_in_delta(map.fm[1], 0.245121,0.00001) end # Tested with SPSS and R def test_pca a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale b=[2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_scale a.recode! {|c| c-a.mean} b.recode! {|c| c-b.mean} ds={'a'=>a,'b'=>b}.to_dataset cov_matrix=Statsample::Bivariate.covariance_matrix(ds) if Statsample.has_gsl? pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>true) pca_set(pca,"gsl") else skip("Eigenvalues could be calculated with GSL (requires gsl)") end pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>false) pca_set(pca,"ruby") end def pca_set(pca,type) expected_eigenvalues=[1.284, 0.0490] expected_eigenvalues.each_with_index{|ev,i| assert_in_delta(ev,pca.eigenvalues[i],0.001) } expected_communality=[0.590, 0.694] expected_communality.each_with_index{|ev,i| assert_in_delta(ev,pca.communalities[i],0.001) } expected_cm=[0.768, 0.833] obs=pca.component_matrix_correlation(1).column(0).to_a expected_cm.each_with_index{|ev,i| assert_in_delta(ev,obs[i],0.001) } assert(pca.summary) end # Tested with R def test_principalaxis matrix=::Matrix[ [1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807], [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]] fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1, :max_iterations=>50) cm=::Matrix[[0.923],[0.912],[0.507],[0.483]] assert_equal_matrix(cm,fa.component_matrix,0.001) h2=[0.852,0.832,0.257,0.233] h2.each_with_index{|ev,i| assert_in_delta(ev,fa.communalities[i],0.001) } eigen1=2.175 assert_in_delta(eigen1, fa.eigenvalues[0],0.001) assert(fa.summary.size>0) fa=Statsample::Factor::PrincipalAxis.new(matrix,:smc=>false) assert_raise RuntimeError do fa.iterate end end def test_rotation_varimax a = Matrix[ [ 0.4320, 0.8129, 0.3872] , [0.7950, -0.5416, 0.2565] , [0.5944, 0.7234, -0.3441], [0.8945, -0.3921, -0.1863] ] expected= Matrix[[-0.0204423, 0.938674, -0.340334], [0.983662, 0.0730206, 0.134997], [0.0826106, 0.435975, -0.893379], [0.939901, -0.0965213, -0.309596]] varimax=Statsample::Factor::Varimax.new(a) assert(!varimax.rotated.nil?, "Rotated shouldn't be empty") assert(!varimax.component_transformation_matrix.nil?, "Component matrix shouldn't be empty") assert(!varimax.h2.nil?, "H2 shouldn't be empty") assert_equal_matrix(expected,varimax.rotated,1e-6) assert(varimax.summary.size>0) end end