test/test_codification.rb in statsample-0.5.1 vs test/test_codification.rb in statsample-0.6.0
- old
+ new
@@ -1,60 +1,81 @@
$:.unshift(File.dirname(__FILE__)+'/../lib/')
require 'statsample'
require 'tempfile'
+require 'tmpdir'
require 'test/unit'
class StatsampleCodificationTestCase < Test::Unit::TestCase
- def initialize(*args)
- v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
- @dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s','dream'=>'d','dreaming'=>'d'}
- @ds={"v1"=>v1}.to_dataset
- super
- end
- def test_create_yaml
- assert_raise ArgumentError do
- Statsample::Codification.create_yaml(@ds,[])
- end
- expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
- yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
- h=YAML::load(yaml_hash)
- assert_equal(['v1'],h.keys)
- assert_equal(expected_keys_v1,h['v1'].keys.sort)
- tf = Tempfile.new("test_codification")
- yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],Statsample::SPLIT_TOKEN,tf)
- tf.close
- tf.open
- h=YAML::load(tf)
- assert_equal(['v1'],h.keys)
- assert_equal(expected_keys_v1,h['v1'].keys.sort)
- tf.close(true)
+ def initialize(*args)
+ v1=%w{run walk,run walking running sleep sleeping,dreaming sleep,dream}.to_vector
+ @dict={'run'=>'r','walk'=>'w','walking'=>'w','running'=>'r','sleep'=>'s', 'sleeping'=>'s', 'dream'=>'d', 'dreaming'=>'d'}
+ @ds={"v1"=>v1}.to_dataset
+ super
+ end
+ def test_create_hash
+ expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
+ hash=Statsample::Codification.create_hash(@ds,['v1'])
+ assert_equal(['v1'],hash.keys)
+ assert_equal(expected_keys_v1,hash['v1'].keys.sort)
+ assert_equal(expected_keys_v1,hash['v1'].values.sort)
+ end
+ def test_create_excel
+ filename=Dir::tmpdir+"/test_excel"+Time.now().to_s+".xls"
+ #filename = Tempfile.new("test_codification_"+Time.now().to_s)
+ Statsample::Codification.create_excel(@ds, ['v1'], filename)
+ field=(["v1"]*8).to_vector
+ keys=%w{dream dreaming run running sleep sleeping walk walking}.to_vector
+ ds=Statsample::Excel.read(filename)
+ assert_equal(field, ds['field'])
+ assert_equal(keys, ds['original'])
+ assert_equal(keys, ds['recoded'])
+ hash=Statsample::Codification.excel_to_recoded_hash(filename)
+ assert_equal(keys.data, hash['v1'].keys.sort)
+ assert_equal(keys.data, hash['v1'].values.sort)
+
+ end
+ def test_create_yaml
+ assert_raise ArgumentError do
+ Statsample::Codification.create_yaml(@ds,[])
end
- def test_recodification
- expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
- assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
- v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
- expected=[['r'],['w','d'],nil,['w','d']]
- assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
- end
- def test_recode_dataset_simple
- yaml=YAML::dump({'v1'=>@dict})
- Statsample::Codification.recode_dataset_simple!(@ds,yaml)
- expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
- assert_not_equal(expected_vector,@ds['v1'])
- assert_equal(expected_vector,@ds['v1_recoded'])
- end
- def test_recode_dataset_split
- yaml=YAML::dump({'v1'=>@dict})
- Statsample::Codification.recode_dataset_split!(@ds,yaml)
- e={}
- e['r']=[1,1,0,1,0,0,0].to_vector
- e['w']=[0,1,1,0,0,0,0].to_vector
- e['s']=[0,0,0,0,1,1,1].to_vector
- e['d']=[0,0,0,0,0,1,1].to_vector
- e.each{|k,expected|
- assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
-
- }
- end
+ expected_keys_v1=%w{run walk walking running sleep sleeping dream dreaming}.sort
+ yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'])
+ h=YAML::load(yaml_hash)
+ assert_equal(['v1'],h.keys)
+ assert_equal(expected_keys_v1,h['v1'].keys.sort)
+ tf = Tempfile.new("test_codification")
+ yaml_hash=Statsample::Codification.create_yaml(@ds,['v1'],tf, Statsample::SPLIT_TOKEN)
+ tf.close
+ tf.open
+ h=YAML::load(tf)
+ assert_equal(['v1'],h.keys)
+ assert_equal(expected_keys_v1,h['v1'].keys.sort)
+ tf.close(true)
+ end
+ def test_recodification
+ expected=[['r'],['w','r'],['w'],['r'],['s'],['s','d'], ['s','d']]
+ assert_equal(expected,Statsample::Codification.recode_vector(@ds['v1'],@dict))
+ v2=['run','walk,dreaming',nil,'walk,dream,dreaming,walking'].to_vector
+ expected=[['r'],['w','d'],nil,['w','d']]
+ assert_equal(expected,Statsample::Codification.recode_vector(v2,@dict))
+ end
+ def test_recode_dataset_simple
+ Statsample::Codification.recode_dataset_simple!(@ds,{'v1'=>@dict})
+ expected_vector=['r','w,r','w','r','s','s,d', 's,d'].to_vector
+ assert_not_equal(expected_vector,@ds['v1'])
+ assert_equal(expected_vector,@ds['v1_recoded'])
+ end
+ def test_recode_dataset_split
+ Statsample::Codification.recode_dataset_split!(@ds,{'v1'=>@dict})
+ e={}
+ e['r']=[1,1,0,1,0,0,0].to_vector
+ e['w']=[0,1,1,0,0,0,0].to_vector
+ e['s']=[0,0,0,0,1,1,1].to_vector
+ e['d']=[0,0,0,0,0,1,1].to_vector
+ e.each{|k,expected|
+ assert_equal(expected,@ds['v1_'+k],"Error on key #{k}")
+
+ }
+ end
end
\ No newline at end of file