test_factor.rb in statsample-1.4.2

- old
+ new
@@ -1,222 +1,219 @@
-require(File.expand_path(File.dirname(__FILE__)+'/helpers_tests.rb'))
-#require 'rserve'
-#require 'statsample/rserve_extension'
+require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb'))
+# require 'rserve'
+# require 'statsample/rserve_extension'
 
-class StatsampleFactorTestCase < MiniTest::Unit::TestCase
+class StatsampleFactorTestCase < Minitest::Test
   include Statsample::Fixtures
   # Based on Hardle and Simar
   def setup
-    @fixtures_dir=File.expand_path(File.dirname(__FILE__)+"/fixtures")
+    @fixtures_dir = File.expand_path(File.dirname(__FILE__) + '/fixtures')
   end
   # Based on Hurdle example
   def test_covariance_matrix
-    ds=Statsample::PlainText.read(@fixtures_dir+"/bank2.dat", %w{v1 v2 v3 v4 v5 v6})
+    ds = Statsample::PlainText.read(@fixtures_dir + '/bank2.dat', %w(v1 v2 v3 v4 v5 v6))
     ds.fields.each {|f|
-      ds[f]=ds[f].centered
+      ds[f] = ds[f].centered
     }
-    cm=ds.covariance_matrix
-    pca =Statsample::Factor::PCA.new( cm, :m=>6)
-    #puts pca.summary
-    #puts pca.feature_matrix
-    exp_eig=[2.985, 0.931,0.242, 0.194, 0.085, 0.035].to_scale
+    cm = ds.covariance_matrix
+    pca = Statsample::Factor::PCA.new(cm, m: 6)
+    # puts pca.summary
+    # puts pca.feature_matrix
+    exp_eig = [2.985, 0.931, 0.242, 0.194, 0.085, 0.035].to_scale
     assert_similar_vector(exp_eig, pca.eigenvalues.to_scale, 0.1)
-    pcs=pca.principal_components(ds)
-    k=6
-    comp_matrix=pca.component_matrix()
+    pcs = pca.principal_components(ds)
+    k = 6
+    comp_matrix = pca.component_matrix
     k.times {|i|
-      pc_id="PC_#{i+1}"
+      pc_id = "PC_#{i + 1}"
       k.times {|j| # variable
-          ds_id="v#{j+1}"
-          r= Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
-          assert_in_delta( r, comp_matrix[j,i]) 
-        }
+        ds_id = "v#{j + 1}"
+        r = Statsample::Bivariate.correlation(ds[ds_id], pcs[pc_id])
+        assert_in_delta(r, comp_matrix[j, i])
+      }
     }
-    
   end
+
   def test_principalcomponents_ruby_gsl
-    
-    ran=Distribution::Normal.rng
-    
-#    @r=::Rserve::Connection.new
+    ran = Distribution::Normal.rng
 
-    samples=20
-    [3,5,7].each {|k|
-      v={}
-      v["x0"]=samples.times.map { ran.call()}.to_scale.centered
+    #    @r=::Rserve::Connection.new
+
+    samples = 20
+    [3, 5, 7].each {|k|
+      v = {}
+      v['x0'] = samples.times.map { ran.call }.to_scale.centered
       (1...k).each {|i|
-        v["x#{i}"]=samples.times.map {|ii| ran.call()*0.5+v["x#{i-1}"][ii]*0.5}.to_scale.centered
+        v["x#{i}"] = samples.times.map { |ii| ran.call * 0.5 + v["x#{i - 1}"][ii] * 0.5 }.to_scale.centered
       }
-      
-      ds=v.to_dataset
-      cm=ds.covariance_matrix
-#      @r.assign('ds',ds)
-#      @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
-#      puts "eigenvalues"
-#      puts @r.eval('v').to_ruby.to_s
-      pca_ruby=Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>false )
-      pca_gsl =Statsample::Factor::PCA.new( cm, :m=>k, :use_gsl=>true  )
+
+      ds = v.to_dataset
+      cm = ds.covariance_matrix
+      #      @r.assign('ds',ds)
+      #      @r.eval('cm<-cor(ds);sm<-eigen(cm, sym=TRUE);v<-sm$vectors')
+      #      puts "eigenvalues"
+      #      puts @r.eval('v').to_ruby.to_s
+      pca_ruby = Statsample::Factor::PCA.new(cm, m: k, use_gsl: false)
+      pca_gsl = Statsample::Factor::PCA.new(cm, m: k, use_gsl: true)
       pc_ruby = pca_ruby.principal_components(ds)
       pc_gsl  = pca_gsl.principal_components(ds)
       # Test component matrix correlation!
-      cm_ruby=pca_ruby.component_matrix
-      #puts cm_ruby.summary
+      cm_ruby = pca_ruby.component_matrix
+      # puts cm_ruby.summary
       k.times {|i|
-        pc_id="PC_#{i+1}"
-        assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i],1e-10)
+        pc_id = "PC_#{i + 1}"
+        assert_in_delta(pca_ruby.eigenvalues[i], pca_gsl.eigenvalues[i], 1e-10)
         # Revert gsl component values
-        pc_gsl_data= (pc_gsl[pc_id][0]-pc_ruby[pc_id][0]).abs>1e-6 ? pc_gsl[pc_id].recode {|v| -v} : pc_gsl[pc_id] 
-        assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6,"PC for #{k} variables")
+        pc_gsl_data = (pc_gsl[pc_id][0] - pc_ruby[pc_id][0]).abs > 1e-6 ? pc_gsl[pc_id].recode(&:-@) : pc_gsl[pc_id]
+        assert_similar_vector(pc_gsl_data, pc_ruby[pc_id], 1e-6, "PC for #{k} variables")
         if false
-        k.times {|j| # variable
-          ds_id="x#{j}"
-          r= Statsample::Bivariate.correlation(ds[ds_id],pc_ruby[pc_id])
-          puts "#{pc_id}-#{ds_id}:#{r}"
-        }
+          k.times {|j| # variable
+            ds_id = "x#{j}"
+            r = Statsample::Bivariate.correlation(ds[ds_id], pc_ruby[pc_id])
+            puts "#{pc_id}-#{ds_id}:#{r}"
+          }
         end
       }
     }
-    #@r.close
+    # @r.close
   end
-  def test_principalcomponents()
-  principalcomponents(true)
-  principalcomponents(false)
-  
-  end  
+
+  def test_principalcomponents
+    principalcomponents(true)
+    principalcomponents(false)
+  end
+
   def principalcomponents(gsl)
-    ran=Distribution::Normal.rng
-    samples=50
-    x1=samples.times.map { ran.call()}.to_scale
-    x2=samples.times.map {|i| ran.call()*0.5+x1[i]*0.5}.to_scale
-    ds={'x1'=>x1,'x2'=>x2}.to_dataset
-    
-    cm=ds.correlation_matrix
-    r=cm[0,1]
-    pca=Statsample::Factor::PCA.new(cm,:m=>2,:use_gsl=>gsl)
-    assert_in_delta(1+r,pca.eigenvalues[0],1e-10)
-    assert_in_delta(1-r,pca.eigenvalues[1],1e-10)
-    hs=1.0 / Math.sqrt(2)
-    assert_equal_vector(Vector[1, 1]*hs, pca.eigenvectors[0])
-    m_1=gsl ? Vector[-1,1] : Vector[1,-1]
-    
-    assert_equal_vector(hs*m_1, pca.eigenvectors[1])    
-    
-    pcs=pca.principal_components(ds)
-    exp_pc_1=ds.collect_with_index {|row,i|
-      hs*(row['x1']+row['x2'])
-    }
-    exp_pc_2=ds.collect_with_index {|row,i|
-      gsl ? hs*(row['x2']-row['x1']) : hs*(row['x1']-row['x2'])
+    ran = Distribution::Normal.rng
+    samples = 50
+    x1 = samples.times.map { ran.call }.to_scale
+    x2 = samples.times.map { |i| ran.call * 0.5 + x1[i] * 0.5 }.to_scale
+    ds = { 'x1' => x1, 'x2' => x2 }.to_dataset
 
+    cm = ds.correlation_matrix
+    r = cm[0, 1]
+    pca = Statsample::Factor::PCA.new(cm, m: 2, use_gsl: gsl)
+    assert_in_delta(1 + r, pca.eigenvalues[0], 1e-10)
+    assert_in_delta(1 - r, pca.eigenvalues[1], 1e-10)
+    hs = 1.0 / Math.sqrt(2)
+    assert_equal_vector(Vector[1, 1] * hs, pca.eigenvectors[0])
+    m_1 = gsl ? Vector[-1, 1] : Vector[1, -1]
+
+    assert_equal_vector(hs * m_1, pca.eigenvectors[1])
+
+    pcs = pca.principal_components(ds)
+    exp_pc_1 = ds.collect_with_index {|row, _i|
+      hs * (row['x1'] + row['x2'])
     }
-    assert_similar_vector(exp_pc_1, pcs["PC_1"])
-    assert_similar_vector(exp_pc_2, pcs["PC_2"])
+    exp_pc_2 = ds.collect_with_index {|row, _i|
+      gsl ? hs * (row['x2'] - row['x1']) : hs * (row['x1'] - row['x2'])
+    }
+    assert_similar_vector(exp_pc_1, pcs['PC_1'])
+    assert_similar_vector(exp_pc_2, pcs['PC_2'])
   end
+
   def test_antiimage
-    cor=Matrix[[1,0.964, 0.312],[0.964,1,0.411],[0.312,0.411,1]]
-    expected=Matrix[[0.062,-0.057, 0.074],[-0.057, 0.057, -0.089], [0.074, -0.089, 0.729]]
-    ai=Statsample::Factor.anti_image_covariance_matrix(cor)
-    assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected.to_s} not equal to #{ai.to_s}")
+    cor = Matrix[[1, 0.964, 0.312], [0.964, 1, 0.411], [0.312, 0.411, 1]]
+    expected = Matrix[[0.062, -0.057, 0.074], [-0.057, 0.057, -0.089], [0.074, -0.089, 0.729]]
+    ai = Statsample::Factor.anti_image_covariance_matrix(cor)
+    assert(Matrix.equal_in_delta?(expected, ai, 0.01), "#{expected} not equal to #{ai}")
   end
+
   def test_kmo
-      @v1=[1 ,2 ,3 ,4 ,7 ,8 ,9 ,10,14,15,20,50,60,70].to_scale
-      @v2=[5 ,6 ,11,12,13,16,17,18,19,20,30,0,0,0].to_scale
-      @v3=[10,3 ,20,30,40,50,80,10,20,30,40,2,3,4].to_scale
-      # KMO: 0.490
-      ds={'v1'=>@v1,'v2'=>@v2,'v3'=>@v3}.to_dataset
-      cor=Statsample::Bivariate.correlation_matrix(ds)
-     kmo=Statsample::Factor.kmo(cor)
-     assert_in_delta(0.667, kmo,0.001)
-     assert_in_delta(0.81, Statsample::Factor.kmo(harman_817),0.01)
-     
+    @v1 = [1, 2, 3, 4, 7, 8, 9, 10, 14, 15, 20, 50, 60, 70].to_scale
+    @v2 = [5, 6, 11, 12, 13, 16, 17, 18, 19, 20, 30, 0, 0, 0].to_scale
+    @v3 = [10, 3, 20, 30, 40, 50, 80, 10, 20, 30, 40, 2, 3, 4].to_scale
+    # KMO: 0.490
+    ds = { 'v1' => @v1, 'v2' => @v2, 'v3' => @v3 }.to_dataset
+    cor = Statsample::Bivariate.correlation_matrix(ds)
+    kmo = Statsample::Factor.kmo(cor)
+    assert_in_delta(0.667, kmo, 0.001)
+    assert_in_delta(0.81, Statsample::Factor.kmo(harman_817), 0.01)
   end
+
   def test_kmo_univariate
-    m=harman_817
-    expected=[0.73,0.76,0.84,0.87,0.53,0.93,0.78,0.86]
+    m = harman_817
+    expected = [0.73, 0.76, 0.84, 0.87, 0.53, 0.93, 0.78, 0.86]
     m.row_size.times.map {|i|
-      assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m,i),0.01)
+      assert_in_delta(expected[i], Statsample::Factor.kmo_univariate(m, i), 0.01)
     }
   end
   # Tested with SPSS and R
   def test_pca
-      a=[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
-      b=[2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_scale
-      a.recode! {|c| c-a.mean}
-      b.recode! {|c| c-b.mean}
-      ds={'a'=>a,'b'=>b}.to_dataset
-      cov_matrix=Statsample::Bivariate.covariance_matrix(ds)
-      if Statsample.has_gsl?
-        pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>true)
-        pca_set(pca,"gsl")
-      else
-        skip("Eigenvalues could be calculated with GSL (requires gsl)")
-      end
-      pca=Statsample::Factor::PCA.new(cov_matrix,:use_gsl=>false)
-      pca_set(pca,"ruby")
+    a = [2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1].to_scale
+    b = [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9].to_scale
+    a.recode! { |c| c - a.mean }
+    b.recode! { |c| c - b.mean }
+    ds = { 'a' => a, 'b' => b }.to_dataset
+    cov_matrix = Statsample::Bivariate.covariance_matrix(ds)
+    if Statsample.has_gsl?
+      pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: true)
+      pca_set(pca, 'gsl')
+    else
+      skip('Eigenvalues could be calculated with GSL (requires gsl)')
+    end
+    pca = Statsample::Factor::PCA.new(cov_matrix, use_gsl: false)
+    pca_set(pca, 'ruby')
   end
-  def pca_set(pca,type)
-      expected_eigenvalues=[1.284, 0.0490]
-      expected_eigenvalues.each_with_index{|ev,i|
-        assert_in_delta(ev,pca.eigenvalues[i],0.001)
-      }
-      expected_communality=[0.590, 0.694]
-      expected_communality.each_with_index{|ev,i|
-        assert_in_delta(ev,pca.communalities[i],0.001)
-      }
-      expected_cm=[0.768, 0.833]
-      obs=pca.component_matrix_correlation(1).column(0).to_a
-      expected_cm.each_with_index{|ev,i|
-        assert_in_delta(ev,obs[i],0.001)
-      }
 
-      assert(pca.summary)
+  def pca_set(pca, _type)
+    expected_eigenvalues = [1.284, 0.0490]
+    expected_eigenvalues.each_with_index{|ev, i|
+      assert_in_delta(ev, pca.eigenvalues[i], 0.001)
+    }
+    expected_communality = [0.590, 0.694]
+    expected_communality.each_with_index{|ev, i|
+      assert_in_delta(ev, pca.communalities[i], 0.001)
+    }
+    expected_cm = [0.768, 0.833]
+    obs = pca.component_matrix_correlation(1).column(0).to_a
+    expected_cm.each_with_index{|ev, i|
+      assert_in_delta(ev, obs[i], 0.001)
+    }
+
+    assert(pca.summary)
   end
 
   # Tested with R
   def test_principalaxis
-      matrix=::Matrix[
-      [1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807],  [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
-      
-      
-      fa=Statsample::Factor::PrincipalAxis.new(matrix,:m=>1, :max_iterations=>50)
+    matrix = ::Matrix[
+    [1.0, 0.709501601093587, 0.877596585880047, 0.272219316266807],  [0.709501601093587, 1.0, 0.291633797330304, 0.871141831433844], [0.877596585880047, 0.291633797330304, 1.0, -0.213373722977167], [0.272219316266807, 0.871141831433844, -0.213373722977167, 1.0]]
 
-      cm=::Matrix[[0.923],[0.912],[0.507],[0.483]]
-      
-      assert_equal_matrix(cm,fa.component_matrix,0.001)
-      
-      h2=[0.852,0.832,0.257,0.233]
-      h2.each_with_index{|ev,i|
-        assert_in_delta(ev,fa.communalities[i],0.001)
-      }
-      eigen1=2.175
-      assert_in_delta(eigen1, fa.eigenvalues[0],0.001)
-      assert(fa.summary.size>0)
-      fa=Statsample::Factor::PrincipalAxis.new(matrix,:smc=>false)
-            
-      assert_raise RuntimeError do
-        fa.iterate
-      end
+    fa = Statsample::Factor::PrincipalAxis.new(matrix, m: 1, max_iterations: 50)
 
-  end
+    cm = ::Matrix[[0.923], [0.912], [0.507], [0.483]]
 
+    assert_equal_matrix(cm, fa.component_matrix, 0.001)
 
+    h2 = [0.852, 0.832, 0.257, 0.233]
+    h2.each_with_index{|ev, i|
+      assert_in_delta(ev, fa.communalities[i], 0.001)
+    }
+    eigen1 = 2.175
+    assert_in_delta(eigen1, fa.eigenvalues[0], 0.001)
+    assert(fa.summary.size > 0)
+    fa = Statsample::Factor::PrincipalAxis.new(matrix, smc: false)
+
+    assert_raise RuntimeError do
+      fa.iterate
+    end
+  end
+
   def test_rotation_varimax
-    a = Matrix[ [ 0.4320,  0.8129,  0.3872]  ,
-      [0.7950, -0.5416,  0.2565]  ,
-      [0.5944,  0.7234, -0.3441],
-    [0.8945, -0.3921, -0.1863] ]
+    a = Matrix[[0.4320,  0.8129,  0.3872],
+               [0.7950, -0.5416,  0.2565],
+               [0.5944,  0.7234, -0.3441],
+               [0.8945, -0.3921, -0.1863]]
 
-    expected= Matrix[[-0.0204423,     0.938674,    -0.340334],
-      [0.983662, 0.0730206, 0.134997],
-      [0.0826106, 0.435975, -0.893379],
-    [0.939901, -0.0965213, -0.309596]]
-    varimax=Statsample::Factor::Varimax.new(a)
+    expected = Matrix[[-0.0204423,     0.938674,    -0.340334],
+                      [0.983662, 0.0730206, 0.134997],
+                      [0.0826106, 0.435975, -0.893379],
+                      [0.939901, -0.0965213, -0.309596]]
+    varimax = Statsample::Factor::Varimax.new(a)
     assert(!varimax.rotated.nil?, "Rotated shouldn't be empty")
     assert(!varimax.component_transformation_matrix.nil?, "Component matrix shouldn't be empty")
     assert(!varimax.h2.nil?, "H2 shouldn't be empty")
-    
-    assert_equal_matrix(expected,varimax.rotated,1e-6)
-    assert(varimax.summary.size>0)
-  end
-  
 
+    assert_equal_matrix(expected, varimax.rotated, 1e-6)
+    assert(varimax.summary.size > 0)
+  end
 end