require File.expand_path(File.dirname(__FILE__) + '/../test_helper') require 'rbbt/tsv' require 'rbbt/util/tmpfile' require 'test/unit' class TestTSV < Test::Unit::TestCase def test_extend a = { "one" => "1", "two" => "2" } a.extend TSV a.key_field = "Number" assert_equal "1", a["one"] end def test_tsv_1 content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/) assert_equal ["a", "aa", "aaa"], tsv["row1"][0] assert_equal ["ValueA", "ValueB", "OtherID"], tsv.fields assert_equal :double, tsv.type assert_equal "Id", tsv.key_field tsv = TSV.open(filename, :sep => /\s+/, :key_field => "Id") assert_equal ["a", "aa", "aaa"], tsv["row1"][0] assert_equal ["ValueA", "ValueB", "OtherID"], tsv.fields assert_equal :double, tsv.type assert_equal "Id", tsv.key_field end end def test_headerless content =<<-EOF row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/) assert_equal ["a", "aa", "aaa"], tsv["row1"][0] assert_equal :double, tsv.type end end def test_headerless_fields content =<<-EOF row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :fields => [1]) assert_equal ["a", "aa", "aaa"], tsv["row1"][0] assert_equal :double, tsv.type assert_equal [%w(a aa aaa)], tsv["row1"] end end def test_tsv_persistence content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :persist => true) assert_equal ["a", "aa", "aaa"], tsv["row1"][0] assert_equal ["ValueA", "ValueB", "OtherID"], tsv.fields assert_equal :double, tsv.type assert_equal "Id", tsv.key_field assert TokyoCabinet::HDB === tsv end end def test_tsv_field_selection content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :key_field => 0) assert_equal ["a", "aa", "aaa"], tsv["row1"][0] tsv = TSV.open(filename, :sep => /\s+/, :key_field => "Id") assert_equal ["a", "aa", "aaa"], tsv["row1"][0] tsv = TSV.open(filename, :sep => /\s+/, :fields => 1) assert_equal ["a", "aa", "aaa"], tsv["row1"].first tsv = TSV.open(filename, :sep => /\s+/, :fields => 2) assert_equal ["b"], tsv["row1"].first tsv = TSV.open(filename, :sep => /\s+/, :fields => [1,2]) assert_equal ["a", "aa", "aaa"], tsv["row1"].first assert_equal ["b"], tsv["row1"].last tsv = TSV.open(filename, :sep => /\s+/, :fields => [1,3]) assert_equal ["a", "aa", "aaa"], tsv["row1"].first assert_equal ["Id1", "Id2"], tsv["row1"].last tsv = TSV.open(filename, :sep => /\s+/, :key_field => "OtherID") assert_equal ["row1"], tsv["Id1"].first assert_equal tsv["Id2"], tsv["Id1"] tsv = TSV.open(filename, :sep => /\s+/, :key_field => "OtherID", :fields => "Id") assert_equal ["row1"], tsv["Id1"].first assert_equal tsv["Id2"], tsv["Id1"] tsv = TSV.open(filename, :sep => /\s+/, :key_field => "OtherID", :fields => ["ValueA",2]) assert_equal ["a", "aa", "aaa"], tsv["Id1"].first assert_equal tsv["Id2"], tsv["Id1"] end end def test_tsv_cast content =<<-EOF #Id Value row1 1|2|3 row2 4 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :cast => :to_i) assert_equal [1,2,3], tsv["row1"][0] tsv = TSV.open(filename, :sep => /\s+/, :cast => :to_f) assert_equal [1.0,2.0,3.0], tsv["row1"][0] end end def test_tsv_single content =<<-EOF #Id Value row1 1 row2 4 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :cast => :to_i, :type => :single, :fields => "Value") assert_equal 1, tsv["row1"] tsv = TSV.open(filename, :sep => /\s+/, :cast => :to_i, :type => :single, :fields => ["Value"]) assert_equal 1, tsv["row1"] tsv = TSV.open(filename, :sep => /\s+/, :type => :single, :key_field => "Value", :fields => ["Id"]) assert_equal "row1", tsv["1"] end end def test_tsv_single_from_flat content =<<-EOF #: :type=:flat #Id Value row1 1 2 row2 4 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :type => :single, :key_field => "Value", :fields => ["Id"]) assert_equal "row1", tsv["1"] end end def test_tsv_serializer content =<<-EOF #Id Value row1 1 row2 4 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :cast => :to_i, :type => :single, :serializer => :integer) assert_equal 1, tsv["row1"] assert String === tsv.send(:[], "row1", true) end end def test_tsv_header_options content =<<-EOF #: :sep=/\\s+/ #Id Value 1 a 2 b EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename) assert_equal [["a"]], tsv["1"] end end def test_tsv_fastimport content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A B Id3 row3 AA B Id3 EOF TmpFile.with_file(content.gsub(/ +/,"\t")) do |filename| tsv = TSV.open(filename, :persist => true) assert_equal ["a", "aa", "aaa"], tsv["row1"][0] assert_equal ["ValueA", "ValueB", "OtherID"], tsv.fields assert_equal :double, tsv.type assert_equal "Id", tsv.key_field end end def test_header_type content =<<-EOF #: :sep=/\\s+/#:type=:single #Id Value 1 a 2 b EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename) assert_equal :single, tsv.type assert_equal "a", tsv["1"] end end def test_single_cast content =<<-EOF #: :sep=/\\s+/#:type=:single#:cast=:to_i #Id Value a 1 b 2 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename) assert_equal :single, tsv.type assert_equal 1, tsv["a"] end end def test_key_field content =<<-EOF #: :sep=/\\s+/#:type=:single #Id Value a 1 b 2 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :key_field => "Value") assert_equal %w(Id), tsv.fields assert_equal "Value", tsv.key_field assert_equal "a", tsv["1"] end end def test_fix content =<<-EOF #: :sep=/\\s+/#:type=:single #Id Value a 1 b 2 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :key_field => "Value", :fix => Proc.new{|l| if l =~ /1/;then "a 3" else l end}) assert_equal "a", tsv["3"] end end def test_select content =<<-EOF #: :sep=/\\s+/#:type=:single #Id Value a 1 b 2 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :key_field => "Value", :select => Proc.new{|l| ! l =~ /1/}) assert(! tsv.include?("3")) end end def test_grep content =<<-EOF #: :sep=/\\s+/#:type=:single #Id Value a 1 b 2 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :key_field => "Value", :grep => "#\\|2") assert(! tsv.include?("3")) end end def test_tsv_grep content =<<-EOF #: :sep=/\\s+/#:type=:single #Id Value a 1 b 2 b 3 d 22 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :key_field => "Value", :grep => "2") assert(tsv.include?("2")) assert(! tsv.include?("3")) end end def test_grep_invert content =<<-EOF #: :sep=/\\s+/#:type=:single #Id Value a 1 b 2 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :key_field => "Value", :tsv_grep => "2", :invert_grep => true) assert(! tsv.include?("2")) assert(tsv.include?("1")) end end def test_grep_header content =<<-EOF #: :sep=/\\s+/#:type=:single#:namespace=Test #Id Value a 7 a 1 b 2 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :key_field => "Value", :grep => '2') assert(tsv.include?("2")) assert(! tsv.include?("7")) end end def test_json content =<<-EOF #: :sep=/\\s+/#:type=:single #Id Value a 1 b 2 EOF require 'json' TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :key_field => "Value", :grep => "#\\|2") end end def test_flat_no_merge content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :type => :flat, :fields => ["ValueA"]) assert_equal ["a", "aa", "aaa"], tsv["row1"] assert_equal ["ValueA"], tsv.fields assert_equal :flat, tsv.type assert_equal "Id", tsv.key_field end end def test_flat_merge content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row1 aaaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :merge => true, :type => :flat, :fields => ["ValueA"]) assert_equal ["a", "aa", "aaa", "aaaa"], tsv["row1"] end end def test_flat content =<<-EOF #Id ValueA row1 a aa aaa row2 b bbb bbbb bb EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :merge => false, :type => :flat, :fields => ["ValueA"]) assert_equal ["a", "aa", "aaa"], tsv["row1"] end end def test_flat_key content =<<-EOF #Id ValueA row1 a aa aaa row2 b bbb bbbb bb aa EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :merge => true, :type => :flat, :key_field => "ValueA") assert_equal ["row1"], tsv["a"] assert_equal ["row1", "row2"], tsv["aa"] end end def test_zipped content =<<-EOF #Id ValueA ValueB row1 a|aa|aaa b|bb|bbb row2 a|aa|aaa c|cc|ccc EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :merge => true, :type => :double, :key_field => "ValueA", :zipped => true) assert_equal [["row1", "row2"], ["b", "c"]], tsv["a"] end end def test_named_array_key content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/) assert_equal "row1", tsv["row1"].key end end def test_unnamed_key content =<<-EOF row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :key_field => 1) assert tsv.keys.include? "a" end end def test_float_array content =<<-EOF #Id ValueA ValueB OtherID row1 0.2 0.3 0 row2 0.1 4.5 0 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :persist => true, :type => :list, :cast => :to_f) assert_equal [0.2, 0.3, 0], tsv["row1"] assert_equal :float_array, tsv.serializer end end def test_flat_field_select content =<<-EOF #: :type=:flat #Id Value row1 a aa aaa row2 b bb bbb EOF TmpFile.with_file(content) do |filename| assert TSV.open(filename, :sep => /\s+/, :key_field => "Value").include? "aa" end end def test_flat2 content =<<-EOF #: :type=:flat #Id Value row1 a|aa|aaa row2 A|AA|AAA EOF TmpFile.with_file(content) do |filename| assert TSV.open(filename, :sep => /\s+/, :type => :flat).include? "row1" assert TSV.open(filename, :sep => /\s+/, :type => :flat)["row1"].include? "a" assert TSV.open(filename, :sep => /\s+/, :type => :flat, :key_field => "Id")["row1"].include? "a" assert TSV.open(filename, :sep => /\s+/, :type => :flat, :key_field => "Id", :fields => ["Value"])["row1"].include? "a" end end def test_tsv_flat_double content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :type => :flat, :key_field => "ValueA", :fields => ["OtherID"], :merge => true) ppp tsv assert tsv["aaa"].include? "Id1" assert tsv["aaa"].include? "Id2" end end def test_flat2single content =<<-EOF #: :type=:flat #Id Value row1 a aa aaa row2 A AA AAA EOF TmpFile.with_file(content) do |filename| assert TSV.open(filename, :sep => /\s+/, :type => :single, :key_field => "Value").include? "aaa" end end def test_shard shard_function = Proc.new do |key| key[-1] end tsv = datafile_test('identifiers').tsv :persist => true, :shard_function => shard_function assert_equal 10000, tsv.keys.length + 2 end def test_merge_key_field assert TSV.open('/home/mvazquezg/git/workflows/genomics/share/gene_ages', :key_field => "FamilyAge", :merge => true, :persist => false, :zipped => true, :type => :double).values.first.length > 1 end def test_flat_merge content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A|aAa B Id3 row3 AA BB|BBB Id3|Id2 EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :key_field => "OtherID", :fields => ["ValueA"], :type => :flat, :merge => true) assert_equal %w(A aAa AA).sort, tsv["Id3"].sort assert_equal %w(a aa aaa AA).sort, tsv["Id2"].sort end end def test_flat_key_field_double content =<<-EOF #Id ValueA row1 a|aa|aaa row2 b|bbb|bbbb|bb EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :key_field => "ValueA", :fields => ["Id"], :type => :flat) assert tsv.include? 'aa' end end def test_flat_key_field content =<<-EOF #: :type=:flat #Id ValueA row1 a aa aaa row2 b bbb bbbb bb EOF TmpFile.with_file(content) do |filename| tsv = TSV.open(filename, :sep => /\s+/, :key_field => "ValueA", :fields => ["Id"], :type => :flat) assert tsv.include? 'aa' end end end