test/test_factor.rb in statsample-0.17.0 vs test/test_factor.rb in statsample-0.18.0
- old
+ new
@@ -1,9 +1,116 @@
require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
+#require 'rserve'
+#require 'statsample/rserve_extension'
class StatsampleFactorTestCase < MiniTest::Unit::TestCase
include Statsample::Fixtures
+ # Based on Hardle and Simar
+ def setup
+ @fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures")
+ end
+ # Based on Hurdle example
+ def test_covariance_matrix
+ ds=Statsample::PlainText.read(@fixtures_dir+"/bank2.dat", %w{v1 v2 v3 v4 v5 v6})
+ ds.fields.each {|f|
+ ds[f]=ds[f].centered
+ }
+ cm=ds.covariance_matrix
+ pca =Statsample::Factor::PCA.new( cm, :m=>6)
+ #puts pca.summary
+ #puts pca.feature_matrix
+ exp_eig=[2.985, 0.931,0.242, 0.194, 0.085, 0.035].to_scale
+ assert_similar_vector(exp_eig, pca.eigenvalues.to_scale, 0.1)
+ pcs=pca.principal_components(ds)
+ k=6
+ comp_matrix=pca.component_matrix()
+ k.times {|i|
+ pc_id="PC_#{i+1}"
+ k.times {|j| # variable
+ ds_id="v#{j+1}"
+ r= Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
+ assert_in_delta( r, comp_matrix[j,i])
+ }
+ }
+
+ end
+ def test_principalcomponents_ruby_gsl
+
+ ran=Distribution::Normal.rng_ugaussian
+
+# @r=::Rserve::Connection.new
+
+ samples=20
+ (3..7).each {|k|
+ v={}
+ v["x0"]=samples.times.map { ran.call()}.to_scale.centered
+ (1...k).each {|i|
+ v["x#{i}"]=samples.times.map {|ii| ran.call()*0.5+v["x#{i-1}"][ii]*0.5}.to_scale.centered
+ }
+ ds=v.to_dataset
+ cm=ds.covariance_matrix
+# @r.assign('ds',ds)
+# @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
+# puts "eigenvalues"
+# puts @r.eval('v').to_ruby.to_s
+ pca_ruby=Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>false )
+ pca_gsl =Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>true )
+ pc_ruby = pca_ruby.principal_components(ds)
+ pc_gsl = pca_gsl.principal_components(ds)
+ # Test component matrix correlation!
+ cm_ruby=pca_ruby.component_matrix
+ #puts cm_ruby.summary
+ k.times {|i|
+ pc_id="PC_#{i+1}"
+ assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i],1e-10)
+ # Revert gsl component values
+ pc_gsl_data= (pc_gsl[pc_id][0]-pc_ruby[pc_id][0]).abs>1e-6 ? pc_gsl[pc_id].recode {|v| -v} : pc_gsl[pc_id]
+ assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6,"PC for #{k} variables")
+ if false
+ k.times {|j| # variable
+ ds_id="x#{j}"
+ r= Statsample::Bivariate.correlation(ds[ds_id],pc_ruby[pc_id])
+ puts "#{pc_id}-#{ds_id}:#{r}"
+ }
+ end
+ }
+ }
+ #@r.close
+ end
+ def test_principalcomponents()
+ principalcomponents(true)
+ principalcomponents(false)
+
+ end
+ def principalcomponents(gsl)
+ ran=Distribution::Normal.rng_ugaussian
+ samples=50
+ x1=samples.times.map { ran.call()}.to_scale
+ x2=samples.times.map {|i| ran.call()*0.5+x1[i]*0.5}.to_scale
+ ds={'x1'=>x1,'x2'=>x2}.to_dataset
+
+ cm=ds.correlation_matrix
+ r=cm[0,1]
+ pca=Statsample::Factor::PCA.new(cm,:m=>2,:use_gsl=>gsl)
+ assert_in_delta(1+r,pca.eigenvalues[0],1e-10)
+ assert_in_delta(1-r,pca.eigenvalues[1],1e-10)
+ hs=1.0 / Math.sqrt(2)
+ assert_equal_matrix(hs*Matrix[[1],[1]],pca.eigenvectors[0])
+ m_1=gsl ? Matrix[[-1],[1]] : Matrix[[1],[-1]]
+ assert_equal_matrix(hs*m_1, pca.eigenvectors[1])
+
+ pcs=pca.principal_components(ds)
+ exp_pc_1=ds.collect_with_index {|row,i|
+ hs*(row['x1']+row['x2'])
+ }
+ exp_pc_2=ds.collect_with_index {|row,i|
+ gsl ? hs*(row['x2']-row['x1']) : hs*(row['x1']-row['x2'])
+
+ }
+ assert_similar_vector(exp_pc_1, pcs["PC_1"])
+ assert_similar_vector(exp_pc_2, pcs["PC_2"])
+ end
def test_antiimage
cor=Matrix[[1,0.964, 0.312],[0.964,1,0.411],[0.312,0.411,1]]
expected=Matrix[[0.062,-0.057, 0.074],[-0.057, 0.057, -0.089], [0.074, -0.089, 0.729]]
ai=Statsample::Factor.anti_image_covariance_matrix(cor)
assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected.to_s} not equal to #{ai.to_s}")
@@ -98,36 +205,32 @@
b.recode! {|c| c-b.mean}
ds={'a'=>a,'b'=>b}.to_dataset
cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
if Statsample.has_gsl?
pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>true)
- pca_set(pca)
+ pca_set(pca,"gsl")
else
skip("Eigenvalues could be calculated with GSL (requires gsl)")
end
pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>false)
- pca_set(pca)
+ pca_set(pca,"ruby")
end
- def pca_set(pca)
+ def pca_set(pca,type)
expected_eigenvalues=[1.284, 0.0490]
expected_eigenvalues.each_with_index{|ev,i|
assert_in_delta(ev,pca.eigenvalues[i],0.001)
}
expected_communality=[0.590, 0.694]
expected_communality.each_with_index{|ev,i|
assert_in_delta(ev,pca.communalities[i],0.001)
}
expected_cm=[0.768, 0.833]
- obs=pca.component_matrix(1).column(0).to_a
+ obs=pca.component_matrix_correlation(1).column(0).to_a
expected_cm.each_with_index{|ev,i|
assert_in_delta(ev,obs[i],0.001)
}
- expected_fm_1=::Matrix[[0.677], [0.735]]
- expected_fm_2=::Matrix[[0.677,0.735], [0.735, -0.677]]
- _test_matrix(expected_fm_1,pca.feature_vector(1))
- _test_matrix(expected_fm_2,pca.feature_vector(2))
assert(pca.summary)
end
# Tested with R
def test_principalaxis
@@ -137,11 +240,11 @@
fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1, :max_iterations=>50)
cm=::Matrix[[0.923],[0.912],[0.507],[0.483]]
- _test_matrix(cm,fa.component_matrix)
+ assert_equal_matrix(cm,fa.component_matrix,0.001)
h2=[0.852,0.832,0.257,0.233]
h2.each_with_index{|ev,i|
assert_in_delta(ev,fa.communalities[i],0.001)
}
@@ -170,16 +273,11 @@
varimax=Statsample::Factor::Varimax.new(a)
assert(!varimax.rotated.nil?, "Rotated shouldn't be empty")
assert(!varimax.component_transformation_matrix.nil?, "Component matrix shouldn't be empty")
assert(!varimax.h2.nil?, "H2 shouldn't be empty")
- _test_matrix(expected,varimax.rotated)
+ assert_equal_matrix(expected,varimax.rotated,1e-6)
assert(varimax.summary.size>0)
end
- def _test_matrix(a,b)
- a.row_size.times {|i|
- a.column_size.times {|j|
- assert_in_delta(a[i,j], b[i,j],0.001)
- }
- }
- end
+
+
end