require(File.expand_path(File.dirname(__FILE__) + '/helpers_tests.rb')) class StatsampleCodificationTestCase < Minitest::Test def initialize(*args) v1 = %w(run walk,run walking running sleep sleeping,dreaming sleep,dream).to_vector @dict = { 'run' => 'r', 'walk' => 'w', 'walking' => 'w', 'running' => 'r', 'sleep' => 's', 'sleeping' => 's', 'dream' => 'd', 'dreaming' => 'd' } @ds = { 'v1' => v1 }.to_dataset super end def test_create_hash expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort hash = Statsample::Codification.create_hash(@ds, ['v1']) assert_equal(['v1'], hash.keys) assert_equal(expected_keys_v1, hash['v1'].keys.sort) assert_equal(expected_keys_v1, hash['v1'].values.sort) end def test_create_excel filename = Dir.tmpdir + '/test_excel' + Time.now.to_s + '.xls' # filename = Tempfile.new("test_codification_"+Time.now().to_s) Statsample::Codification.create_excel(@ds, ['v1'], filename) field = (['v1'] * 8).to_vector keys = %w(dream dreaming run running sleep sleeping walk walking).to_vector ds = Statsample::Excel.read(filename) assert_equal(field, ds['field']) assert_equal(keys, ds['original']) assert_equal(keys, ds['recoded']) hash = Statsample::Codification.excel_to_recoded_hash(filename) assert_equal(keys.data, hash['v1'].keys.sort) assert_equal(keys.data, hash['v1'].values.sort) end def test_create_yaml assert_raise ArgumentError do Statsample::Codification.create_yaml(@ds, []) end expected_keys_v1 = %w(run walk walking running sleep sleeping dream dreaming).sort yaml_hash = Statsample::Codification.create_yaml(@ds, ['v1']) h = YAML.load(yaml_hash) assert_equal(['v1'], h.keys) assert_equal(expected_keys_v1, h['v1'].keys.sort) tf = Tempfile.new('test_codification') yaml_hash = Statsample::Codification.create_yaml(@ds, ['v1'], tf, Statsample::SPLIT_TOKEN) tf.close tf.open h = YAML.load(tf) assert_equal(['v1'], h.keys) assert_equal(expected_keys_v1, h['v1'].keys.sort) tf.close(true) end def test_recodification expected = [['r'], %w(w r), ['w'], ['r'], ['s'], %w(s d), %w(s d)] assert_equal(expected, Statsample::Codification.recode_vector(@ds['v1'], @dict)) v2 = ['run', 'walk,dreaming', nil, 'walk,dream,dreaming,walking'].to_vector expected = [['r'], %w(w d), nil, %w(w d)] assert_equal(expected, Statsample::Codification.recode_vector(v2, @dict)) end def test_recode_dataset_simple Statsample::Codification.recode_dataset_simple!(@ds, 'v1' => @dict) expected_vector = ['r', 'w,r', 'w', 'r', 's', 's,d', 's,d'].to_vector assert_not_equal(expected_vector, @ds['v1']) assert_equal(expected_vector, @ds['v1_recoded']) end def test_recode_dataset_split Statsample::Codification.recode_dataset_split!(@ds, 'v1' => @dict) e = {} e['r'] = [1, 1, 0, 1, 0, 0, 0].to_vector e['w'] = [0, 1, 1, 0, 0, 0, 0].to_vector e['s'] = [0, 0, 0, 0, 1, 1, 1].to_vector e['d'] = [0, 0, 0, 0, 0, 1, 1].to_vector e.each{|k, expected| assert_equal(expected, @ds['v1_' + k], "Error on key #{k}") } end end