test/test_factor.rb in statsample-1.4.3 vs test/test_factor.rb in statsample-1.5.0

- old
+ new

@@ -16,12 +16,12 @@ } cm = ds.covariance_matrix pca = Statsample::Factor::PCA.new(cm, m: 6) # puts pca.summary # puts pca.feature_matrix - exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].to_scale - assert_similar_vector(exp_eig, pca.eigenvalues.to_scale, 0.1) + exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].to_numeric + assert_similar_vector(exp_eig, pca.eigenvalues.to_numeric, 0.1) pcs = pca.principal_components(ds) k = 6 comp_matrix = pca.component_matrix k.times {|i| pc_id = "PC_#{i + 1}" @@ -32,63 +32,65 @@ } } end def test_principalcomponents_ruby_gsl - ran = Distribution::Normal.rng + if Statsample.has_gsl? + ran = Distribution::Normal.rng - # @r=::Rserve::Connection.new + # @r=::Rserve::Connection.new - samples = 20 - [3, 5, 7].each {|k| - v = {} - v['x0'] = samples.times.map { ran.call }.to_scale.centered - (1...k).each {|i| - v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.to_scale.centered - } + samples = 20 + [3, 5, 7].each {|k| + v = {} + v['x0'] = samples.times.map { ran.call }.to_numeric.centered + (1...k).each {|i| + v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.to_numeric.centered + } - ds = v.to_dataset - cm = ds.covariance_matrix - # @r.assign('ds',ds) - # @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors') - # puts "eigenvalues" - # puts @r.eval('v').to_ruby.to_s - pca_ruby = Statsample::Factor::PCA.new(cm, m: k, use_gsl: false) - pca_gsl = Statsample::Factor::PCA.new(cm, m: k, use_gsl: true) - pc_ruby = pca_ruby.principal_components(ds) - pc_gsl = pca_gsl.principal_components(ds) - # Test component matrix correlation! - cm_ruby = pca_ruby.component_matrix - # puts cm_ruby.summary - k.times {|i| - pc_id = "PC_#{i + 1}" - assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10) - # Revert gsl component values - pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id] - assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables") - if false - k.times {|j| # variable - ds_id = "x#{j}" - r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id]) - puts "#{pc_id}-#{ds_id}:#{r}" - } - end + ds = v.to_dataset + cm = ds.covariance_matrix + # @r.assign('ds',ds) + # @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors') + # puts "eigenvalues" + # puts @r.eval('v').to_ruby.to_s + pca_ruby = Statsample::Factor::PCA.new(cm, m: k, use_gsl: false) + pca_gsl = Statsample::Factor::PCA.new(cm, m: k, use_gsl: true) + pc_ruby = pca_ruby.principal_components(ds) + pc_gsl = pca_gsl.principal_components(ds) + # Test component matrix correlation! + cm_ruby = pca_ruby.component_matrix + # puts cm_ruby.summary + k.times {|i| + pc_id = "PC_#{i + 1}" + assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10) + # Revert gsl component values + pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id] + assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables") + if false + k.times {|j| # variable + ds_id = "x#{j}" + r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id]) + puts "#{pc_id}-#{ds_id}:#{r}" + } + end + } } - } + end # @r.close end def test_principalcomponents - principalcomponents(true) + principalcomponents(true) if Statsample.has_gsl? principalcomponents(false) end def principalcomponents(gsl) ran = Distribution::Normal.rng samples = 50 - x1 = samples.times.map { ran.call }.to_scale - x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.to_scale + x1 = samples.times.map { ran.call }.to_numeric + x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.to_numeric ds = { 'x1' => x1, 'x2' => x2 }.to_dataset cm = ds.correlation_matrix r = cm[0, 1] pca = Statsample::Factor::PCA.new(cm, m: 2, use_gsl: gsl) @@ -117,13 +119,13 @@ ai = Statsample::Factor.anti_image_covariance_matrix(cor) assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected} not equal to #{ai}") end def test_kmo - @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_scale - @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_scale - @v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_scale + @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric + @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric + @v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric # KMO: 0.490 ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset cor = Statsample::Bivariate.correlation_matrix(ds) kmo = Statsample::Factor.kmo(cor) assert_in_delta(0.667, kmo, 0.001) @@ -137,15 +139,17 @@ assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m, i), 0.01) } end # Tested with SPSS and R def test_pca - a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale - b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_scale + + a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_numeric + b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_numeric a.recode! { |c| c - a.mean } b.recode! { |c| c - b.mean } ds = { 'a' => a, 'b' => b }.to_dataset + cov_matrix = Statsample::Bivariate.covariance_matrix(ds) if Statsample.has_gsl? pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: true) pca_set(pca, 'gsl') else @@ -154,9 +158,11 @@ pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: false) pca_set(pca, 'ruby') end def pca_set(pca, _type) + + expected_eigenvalues = [1.284, 0.0490] expected_eigenvalues.each_with_index{|ev, i| assert_in_delta(ev, pca.eigenvalues[i], 0.001) } expected_communality = [0.590, 0.694]