test_factor.rb in statsample-1.5.0

- old
+ new

@@ -16,12 +16,12 @@
     }
     cm = ds.covariance_matrix
     pca = Statsample::Factor::PCA.new(cm, m: 6)
     # puts pca.summary
     # puts pca.feature_matrix
-    exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].to_scale
-    assert_similar_vector(exp_eig, pca.eigenvalues.to_scale, 0.1)
+    exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].to_numeric
+    assert_similar_vector(exp_eig, pca.eigenvalues.to_numeric, 0.1)
     pcs = pca.principal_components(ds)
     k = 6
     comp_matrix = pca.component_matrix
     k.times {|i|
       pc_id = "PC_#{i + 1}"
@@ -32,63 +32,65 @@
       }
     }
   end
 
   def test_principalcomponents_ruby_gsl
-    ran = Distribution::Normal.rng
+    if Statsample.has_gsl?
+      ran = Distribution::Normal.rng
 
-    #    @r=::Rserve::Connection.new
+      #    @r=::Rserve::Connection.new
 
-    samples = 20
-    [3, 5, 7].each {|k|
-      v = {}
-      v['x0'] = samples.times.map { ran.call }.to_scale.centered
-      (1...k).each {|i|
-        v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.to_scale.centered
-      }
+      samples = 20
+      [3, 5, 7].each {|k|
+        v = {}
+        v['x0'] = samples.times.map { ran.call }.to_numeric.centered
+        (1...k).each {|i|
+          v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.to_numeric.centered
+        }
 
-      ds = v.to_dataset
-      cm = ds.covariance_matrix
-      #      @r.assign('ds',ds)
-      #      @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
-      #      puts "eigenvalues"
-      #      puts @r.eval('v').to_ruby.to_s
-      pca_ruby = Statsample::Factor::PCA.new(cm, m: k, use_gsl: false)
-      pca_gsl = Statsample::Factor::PCA.new(cm, m: k, use_gsl: true)
-      pc_ruby = pca_ruby.principal_components(ds)
-      pc_gsl  = pca_gsl.principal_components(ds)
-      # Test component matrix correlation!
-      cm_ruby = pca_ruby.component_matrix
-      # puts cm_ruby.summary
-      k.times {|i|
-        pc_id = "PC_#{i + 1}"
-        assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
-        # Revert gsl component values
-        pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
-        assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
-        if false
-          k.times {|j| # variable
-            ds_id = "x#{j}"
-            r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
-            puts "#{pc_id}-#{ds_id}:#{r}"
-          }
-        end
+        ds = v.to_dataset
+        cm = ds.covariance_matrix
+        #      @r.assign('ds',ds)
+        #      @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
+        #      puts "eigenvalues"
+        #      puts @r.eval('v').to_ruby.to_s
+        pca_ruby = Statsample::Factor::PCA.new(cm, m: k, use_gsl: false)
+        pca_gsl = Statsample::Factor::PCA.new(cm, m: k, use_gsl: true)
+        pc_ruby = pca_ruby.principal_components(ds)
+        pc_gsl  = pca_gsl.principal_components(ds)
+        # Test component matrix correlation!
+        cm_ruby = pca_ruby.component_matrix
+        # puts cm_ruby.summary
+        k.times {|i|
+          pc_id = "PC_#{i + 1}"
+          assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
+          # Revert gsl component values
+          pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
+          assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
+          if false
+            k.times {|j| # variable
+              ds_id = "x#{j}"
+              r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
+              puts "#{pc_id}-#{ds_id}:#{r}"
+            }
+          end
+        }
       }
-    }
+    end
     # @r.close
   end
 
   def test_principalcomponents
-    principalcomponents(true)
+    principalcomponents(true) if Statsample.has_gsl?
     principalcomponents(false)
   end
 
   def principalcomponents(gsl)
     ran = Distribution::Normal.rng
     samples = 50
-    x1 = samples.times.map { ran.call }.to_scale
-    x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.to_scale
+    x1 = samples.times.map { ran.call }.to_numeric
+    x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.to_numeric
     ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
 
     cm = ds.correlation_matrix
     r = cm[0, 1]
     pca = Statsample::Factor::PCA.new(cm, m: 2, use_gsl: gsl)
@@ -117,13 +119,13 @@
     ai = Statsample::Factor.anti_image_covariance_matrix(cor)
     assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected} not equal to #{ai}")
   end
 
   def test_kmo
-    @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_scale
-    @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_scale
-    @v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_scale
+    @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_numeric
+    @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_numeric
+    @v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_numeric
     # KMO: 0.490
     ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
     cor = Statsample::Bivariate.correlation_matrix(ds)
     kmo = Statsample::Factor.kmo(cor)
     assert_in_delta(0.667, kmo, 0.001)
@@ -137,15 +139,17 @@
       assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m, i), 0.01)
     }
   end
   # Tested with SPSS and R
   def test_pca
-    a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
-    b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_scale
+
+    a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_numeric
+    b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_numeric
     a.recode! { |c| c - a.mean }
     b.recode! { |c| c - b.mean }
     ds = { 'a' => a, 'b' => b }.to_dataset
+
     cov_matrix = Statsample::Bivariate.covariance_matrix(ds)
     if Statsample.has_gsl?
       pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: true)
       pca_set(pca, 'gsl')
     else
@@ -154,9 +158,11 @@
     pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: false)
     pca_set(pca, 'ruby')
   end
 
   def pca_set(pca, _type)
+
+
     expected_eigenvalues = [1.284, 0.0490]
     expected_eigenvalues.each_with_index{|ev, i|
       assert_in_delta(ev, pca.eigenvalues[i], 0.001)
     }
     expected_communality = [0.590, 0.694]