test/test_factor.rb in statsample-0.17.0 vs test/test_factor.rb in statsample-0.18.0

- old
+ new

@@ -1,9 +1,116 @@ require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb')) +#require 'rserve' +#require 'statsample/rserve_extension' class StatsampleFactorTestCase < MiniTest::Unit::TestCase include Statsample::Fixtures + # Based on Hardle and Simar + def setup + @fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures") + end + # Based on Hurdle example + def test_covariance_matrix + ds=Statsample::PlainText.read(@fixtures_dir+"/bank2.dat", %w{v1 v2 v3 v4 v5 v6}) + ds.fields.each {|f| + ds[f]=ds[f].centered + } + cm=ds.covariance_matrix + pca =Statsample::Factor::PCA.new( cm, :m=>6) + #puts pca.summary + #puts pca.feature_matrix + exp_eig=[2.985, 0.931,0.242, 0.194, 0.085, 0.035].to_scale + assert_similar_vector(exp_eig, pca.eigenvalues.to_scale, 0.1) + pcs=pca.principal_components(ds) + k=6 + comp_matrix=pca.component_matrix() + k.times {|i| + pc_id="PC_#{i+1}" + k.times {|j| # variable + ds_id="v#{j+1}" + r= Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id]) + assert_in_delta( r, comp_matrix[j,i]) + } + } + + end + def test_principalcomponents_ruby_gsl + + ran=Distribution::Normal.rng_ugaussian + +# @r=::Rserve::Connection.new + + samples=20 + (3..7).each {|k| + v={} + v["x0"]=samples.times.map { ran.call()}.to_scale.centered + (1...k).each {|i| + v["x#{i}"]=samples.times.map {|ii| ran.call()*0.5+v["x#{i-1}"][ii]*0.5}.to_scale.centered + } + ds=v.to_dataset + cm=ds.covariance_matrix +# @r.assign('ds',ds) +# @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors') +# puts "eigenvalues" +# puts @r.eval('v').to_ruby.to_s + pca_ruby=Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>false ) + pca_gsl =Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>true ) + pc_ruby = pca_ruby.principal_components(ds) + pc_gsl = pca_gsl.principal_components(ds) + # Test component matrix correlation! + cm_ruby=pca_ruby.component_matrix + #puts cm_ruby.summary + k.times {|i| + pc_id="PC_#{i+1}" + assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i],1e-10) + # Revert gsl component values + pc_gsl_data= (pc_gsl[pc_id][0]-pc_ruby[pc_id][0]).abs>1e-6 ? pc_gsl[pc_id].recode {|v| -v} : pc_gsl[pc_id] + assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6,"PC for #{k} variables") + if false + k.times {|j| # variable + ds_id="x#{j}" + r= Statsample::Bivariate.correlation(ds[ds_id],pc_ruby[pc_id]) + puts "#{pc_id}-#{ds_id}:#{r}" + } + end + } + } + #@r.close + end + def test_principalcomponents() + principalcomponents(true) + principalcomponents(false) + + end + def principalcomponents(gsl) + ran=Distribution::Normal.rng_ugaussian + samples=50 + x1=samples.times.map { ran.call()}.to_scale + x2=samples.times.map {|i| ran.call()*0.5+x1[i]*0.5}.to_scale + ds={'x1'=>x1,'x2'=>x2}.to_dataset + + cm=ds.correlation_matrix + r=cm[0,1] + pca=Statsample::Factor::PCA.new(cm,:m=>2,:use_gsl=>gsl) + assert_in_delta(1+r,pca.eigenvalues[0],1e-10) + assert_in_delta(1-r,pca.eigenvalues[1],1e-10) + hs=1.0 / Math.sqrt(2) + assert_equal_matrix(hs*Matrix[[1],[1]],pca.eigenvectors[0]) + m_1=gsl ? Matrix[[-1],[1]] : Matrix[[1],[-1]] + assert_equal_matrix(hs*m_1, pca.eigenvectors[1]) + + pcs=pca.principal_components(ds) + exp_pc_1=ds.collect_with_index {|row,i| + hs*(row['x1']+row['x2']) + } + exp_pc_2=ds.collect_with_index {|row,i| + gsl ? hs*(row['x2']-row['x1']) : hs*(row['x1']-row['x2']) + + } + assert_similar_vector(exp_pc_1, pcs["PC_1"]) + assert_similar_vector(exp_pc_2, pcs["PC_2"]) + end def test_antiimage cor=Matrix[[1,0.964, 0.312],[0.964,1,0.411],[0.312,0.411,1]] expected=Matrix[[0.062,-0.057, 0.074],[-0.057, 0.057, -0.089], [0.074, -0.089, 0.729]] ai=Statsample::Factor.anti_image_covariance_matrix(cor) assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected.to_s} not equal to #{ai.to_s}") @@ -98,36 +205,32 @@ b.recode! {|c| c-b.mean} ds={'a'=>a,'b'=>b}.to_dataset cov_matrix=Statsample::Bivariate.covariance_matrix(ds) if Statsample.has_gsl? pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>true) - pca_set(pca) + pca_set(pca,"gsl") else skip("Eigenvalues could be calculated with GSL (requires gsl)") end pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>false) - pca_set(pca) + pca_set(pca,"ruby") end - def pca_set(pca) + def pca_set(pca,type) expected_eigenvalues=[1.284, 0.0490] expected_eigenvalues.each_with_index{|ev,i| assert_in_delta(ev,pca.eigenvalues[i],0.001) } expected_communality=[0.590, 0.694] expected_communality.each_with_index{|ev,i| assert_in_delta(ev,pca.communalities[i],0.001) } expected_cm=[0.768, 0.833] - obs=pca.component_matrix(1).column(0).to_a + obs=pca.component_matrix_correlation(1).column(0).to_a expected_cm.each_with_index{|ev,i| assert_in_delta(ev,obs[i],0.001) } - expected_fm_1=::Matrix[[0.677], [0.735]] - expected_fm_2=::Matrix[[0.677,0.735], [0.735, -0.677]] - _test_matrix(expected_fm_1,pca.feature_vector(1)) - _test_matrix(expected_fm_2,pca.feature_vector(2)) assert(pca.summary) end # Tested with R def test_principalaxis @@ -137,11 +240,11 @@ fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1, :max_iterations=>50) cm=::Matrix[[0.923],[0.912],[0.507],[0.483]] - _test_matrix(cm,fa.component_matrix) + assert_equal_matrix(cm,fa.component_matrix,0.001) h2=[0.852,0.832,0.257,0.233] h2.each_with_index{|ev,i| assert_in_delta(ev,fa.communalities[i],0.001) } @@ -170,16 +273,11 @@ varimax=Statsample::Factor::Varimax.new(a) assert(!varimax.rotated.nil?, "Rotated shouldn't be empty") assert(!varimax.component_transformation_matrix.nil?, "Component matrix shouldn't be empty") assert(!varimax.h2.nil?, "H2 shouldn't be empty") - _test_matrix(expected,varimax.rotated) + assert_equal_matrix(expected,varimax.rotated,1e-6) assert(varimax.summary.size>0) end - def _test_matrix(a,b) - a.row_size.times {|i| - a.column_size.times {|j| - assert_in_delta(a[i,j], b[i,j],0.001) - } - } - end + + end