test/rbbt/util/test_tsv.rb in rbbt-util-1.1.0 vs test/rbbt/util/test_tsv.rb in rbbt-util-1.2.1

- old
+ new

@@ -10,12 +10,12 @@ row2 A B EOF TmpFile.with_file(content) do |filename| data = {} - key_field, fields = TSV.parse(data, File.open(filename), :sep => /\s+/, :keep_empty => true) - assert_equal ["ValueA", "ValueB", "Comment"], fields + data, extra = TSV.parse(File.open(filename), :sep => /\s+/, :keep_empty => true) + assert_equal ["ValueA", "ValueB", "Comment"], extra[:fields] assert_equal ["c"], data["row1"][2] assert_equal [""], data["row2"][2] end end @@ -25,11 +25,11 @@ row1 a b c row2 A B C EOF TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s/) + tsv = TSV.new(File.open(filename), :double, :sep => /\s/) assert_equal [["a"],["c"]], tsv.reorder(:main, ["ValueA", "Comment"])["row1"] end end def test_headers @@ -62,13 +62,13 @@ row2 A B EOF TmpFile.with_file(content) do |filename| data = {} - key_field, fields = TSV.parse(data, File.open(filename), :sep => /\s+/) - assert_equal "Id", key_field - assert_equal ["ValueA", "ValueB"], fields + data, extra = TSV.parse(File.open(filename), :sep => /\s+/) + assert_equal "Id", extra[:key_field] + assert_equal ["ValueA", "ValueB"], extra[:fields] assert_equal ["a", "aa", "aaa"], data["row1"][0] end end def test_large @@ -77,11 +77,11 @@ row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID", :large => true) + tsv = TSV.new(File.open(filename), :double, :sep => /\s+/, :key => "OtherID", :large => true) assert_equal "OtherID", tsv.key_field assert_equal ["Id", "ValueA", "ValueB"], tsv.fields assert_equal ["a", "aa", "aaa"], tsv["Id2"][1] end end @@ -92,11 +92,11 @@ row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID") + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID") assert_equal "OtherID", tsv.key_field assert_equal ["Id", "ValueA", "ValueB"], tsv.fields assert_equal ["a", "aa", "aaa"], tsv["Id1"][1] assert_equal ["a", "aa", "aaa"], tsv["Id2"][1] end @@ -109,11 +109,11 @@ row2 A B Id3 row3 a C Id4 EOF TmpFile.with_file(content) do |filename| - tsv = TSV.open_file(filename + '#:sep=/\s+/#:native=OtherID') + tsv = TSV.open_file(filename + '#:sep=/\s+/#:key=OtherID') assert_equal "OtherID", tsv.key_field assert_equal ["Id", "ValueA", "ValueB"], tsv.fields assert_equal ["a", "aa", "aaa"], tsv["Id1"][1] assert_equal ["a", "aa", "aaa"], tsv["Id2"][1] end @@ -127,13 +127,13 @@ row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID", :extra => 2) + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :others => 2) assert_equal ["b"], tsv["Id2"][0] - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID", :extra => 'ValueB') + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :others => 'ValueB') assert_equal ["b"], tsv["Id2"][0] end end def test_case @@ -142,11 +142,11 @@ row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID", :case_insensitive => true) + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :case_insensitive => true) assert_equal "OtherID", tsv.key_field assert_equal ["Id", "ValueA", "ValueB"], tsv.fields assert_equal ["a", "aa", "aaa"], tsv["id1"][1] assert_equal ["a", "aa", "aaa"], tsv["Id2"][1] end @@ -158,19 +158,20 @@ row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| - tsv = TSV.new(filename, :sep => /\s+/, :native => "OtherID", :persistence => true) + tsv = TSV.new(filename, :sep => /\s+/, :key => "OtherID", :persistence => true) assert_equal ["Id", "ValueA", "ValueB"], tsv.fields tsv['Id4'] = [["row3"],["aA"],["bB","bbBB"]] assert_equal ["aA"], tsv["Id4"][1] - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID", :persistence => true) + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :persistence => true) assert_equal ["Id", "ValueA", "ValueB"], tsv.fields + assert_equal ["aA"], tsv["Id4"][1] assert_equal [["b"],["B"]], tsv.values_at("Id1", "Id3").collect{|values| values[2]} - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID", :persistence => true, :flatten => true) + tsv = TSV.new(File.open(filename), :flat, :sep => /\s+/, :key => "OtherID", :persistence => false) assert(tsv["Id3"].include? "A") end end def test_index_headerless @@ -179,11 +180,11 @@ row2 A B Id3 EOF TmpFile.with_file(content) do |filename| tsv = TSV.new(File.open(filename), :sep => /\s+/) - index = tsv.index(:case_insensitive => true, :field => 2) + index = tsv.index(:case_insensitive => true, :target => 2) assert index["row1"].include? "Id1" end end @@ -193,18 +194,18 @@ row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID", :persistence => false) + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :persistence => false) index = tsv.index(:case_insensitive => true) assert index["row1"].include? "Id1" assert_equal "OtherID", index.key_field end TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID") + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID") index = tsv.index(:case_insensitive => true) assert index["row1"].include? "Id1" assert_equal "OtherID", index.key_field end end @@ -215,19 +216,19 @@ row1 a|aa|aaa b|A Id1 row2 A a|B Id3 EOF TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID", :persistence => true) + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :persistence => true) index = tsv.index(:case_insensitive => false, :order => true) assert_equal "Id1", index['a'].first assert_equal "Id3", index['A'].first assert_equal "OtherID", index.key_field end TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID") + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID") index = tsv.index(:case_insensitive => true) assert index["row1"].include? "Id1" assert_equal "OtherID", index.key_field end end @@ -238,11 +239,11 @@ row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID", :persistence => true) + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :persistence => true) index = tsv.index(:case_insensitive => true) assert index.values_at(*["row1"]).first.include? "Id1" end end @@ -252,11 +253,11 @@ row1 a|aa|aaa b Id1|Id2 row2 A B Id3 EOF TmpFile.with_file(content) do |filename| - tsv = TSV.new(File.open(filename), :sep => /\s+/, :native => "OtherID", :case_insensitive => true) + tsv = TSV.new(File.open(filename), :sep => /\s+/, :key => "OtherID", :case_insensitive => true) assert_equal "OtherID", tsv.key_field assert_equal ["Id", "ValueA", "ValueB"], tsv.fields assert_equal ["a", "aa", "aaa"], tsv["id1"][1] assert_equal ["a", "aa", "aaa"], tsv["Id2"]["ValueA"] @@ -271,21 +272,36 @@ def test_helpers begin require 'rbbt/sources/organism' filename = File.join(Organism.datadir('Sce'), 'identifiers') missing = true - index = TSV.index(filename, :persistence => true, :native => "Associated Gene Name") + index = TSV.index(filename, :persistence => true, :key => "Associated Gene Name") assert index['1020'].include? 'CDK5' - index = TSV.index(filename, :persistence => true, :native => "Associated Gene Name") + index = TSV.index(filename, :persistence => true, :key => "Associated Gene Name") assert index[[nil,'1020']].include? 'CDK5' - index = TSV.index(filename, :persistence => true, :native => "Associated Gene Name") + index = TSV.index(filename, :persistence => true, :key => "Associated Gene Name") assert index[['MISSING','1020']].include? 'CDK5' rescue Exception end end + def test_grep + content =<<-EOF +#Id ValueA ValueB OtherID +row1 a|aa|aaa b Id1|Id2 +row2 A B Id3 + EOF + TmpFile.with_file(content) do |filename| + tsv = TSV.new(File.open(filename), :sep => /\s+/, :grep => %w(row1)) + assert tsv.keys.include? "row1" + assert( ! tsv.keys.include?("row2")) + end + end + + + def test_sort content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A B Id3 @@ -314,11 +330,32 @@ tsv = TSV.new(File.open(filename), :sep => /\s+/) assert_equal content, tsv.to_s end end + def test_to_s_ordered + content =<<-EOF +#Id ValueA ValueB OtherID +row1 a|aa|aaa b Id1|Id2 +row2 A B Id3 + EOF + content2 =<<-EOF +#Id ValueA ValueB OtherID +row2 A B Id3 +row1 a|aa|aaa b Id1|Id2 + EOF + + + TmpFile.with_file(content) do |filename| + tsv = TSV.new(File.open(filename), :sep => /\s+/) + assert_equal content, tsv.to_s(%w(row1 row2)) + assert_not_equal content, tsv.to_s(%w(row2 row1)) + assert_equal content2, tsv.to_s(%w(row2 row1)) + end + end + def test_smart_merge_single content1 =<<-EOF #Id ValueA ValueB row1 a|aa|aaa b row2 A B @@ -330,15 +367,15 @@ C B Id3 EOF tsv1 = tsv2 = nil TmpFile.with_file(content1) do |filename| - tsv1 = TSV.new(File.open(filename), :sep => /\s+/, :unique => true) + tsv1 = TSV.new(File.open(filename), :list, :sep => /\s+/) end TmpFile.with_file(content2) do |filename| - tsv2 = TSV.new(File.open(filename), :sep => /\s+/, :unique => true) + tsv2 = TSV.new(File.open(filename), :list, :sep => /\s+/) end tsv1.smart_merge tsv2, "ValueB" assert_equal "C", tsv1["row2"]["ValueC"] @@ -488,10 +525,11 @@ assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"] assert_equal %w(C), tsv1["row2"]["ValueC"] assert_equal %w(a aa aaa aaaa), tsv1["row1"]["ValueA"] end + def test_smart_merge_common_fields content1 =<<-EOF #Id ValueA ValueB row1 a|aa|aaa b row2 A B @@ -647,10 +685,24 @@ assert_equal ["Pref:A"], tsv["row2"]["ValueA"] end end + def test_break_with_fix + content =<<-EOF +#Id ValueA ValueB OtherID +row1 a|aa|aaa b Id1|Id2 +row2 A B Id3 +row3 a C Id4 + EOF + + TmpFile.with_file(content) do |filename| + tsv = TSV.new(File.open(filename), :sep => /\s+/, :fix => proc{|l| l =~ /^row2/? nil : l}) + assert_equal %w(row1), tsv.keys + end + end + def test_open_stringoptions content =<<-EOF #Id ValueA ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A B Id3 @@ -676,11 +728,11 @@ row3 a C Id4 EOF TmpFile.with_file(content) do |filename| tsv = TSV.new(filename + '#:sep=/\s+/') - assert tsv.list + assert tsv.type == :double new = tsv.select %w(b Id4) assert_equal %w(row1 row3).sort, new.keys new = tsv.select "ValueB" => %w(b Id4) @@ -690,12 +742,12 @@ assert_equal %w(row1 row3).sort, new.keys new = tsv.select "ValueB" => /b|Id4/ assert_equal %w(row1).sort, new.keys - tsv = TSV.new(filename + '#:sep=/\s+/#:unique') - assert ! tsv.list + tsv = TSV.new(filename + '#:sep=/\s+/#:type=:flat') + assert tsv.type != :double new = tsv.select %w(b Id4) end end @@ -725,38 +777,31 @@ TmpFile.with_file(content) do |filename| tsv = TSV.new(filename + '#:sep=/\s+/') tsv.add_field "Str length" do |k,v| (v.flatten * " ").length end - assert tsv.fields.include?("Str length") end end - def test_tsv_cache - content =<<-EOF + def test_cast + content =<<-EOF #Id LetterValue#ValueA LetterValue#ValueB OtherID row1 a|aa|aaa b Id1|Id2 row2 A B Id3 row3 a C Id4 EOF TmpFile.with_file(content) do |filename| - tsv = CacheHelper.tsv_cache('test_tsv_cache', filename) do - TSV.new(filename + '#:sep=/\s+/') - end - tsv - - tsv1 = CacheHelper.tsv_cache('test_tsv_cache', filename) do - assert false - end + tsv = TSV.new(filename + '#:sep=/\s+/#:cast="to_sym"') + assert tsv['row1']["OtherID"].include?(:Id1) + assert ! tsv['row1']["OtherID"].include?("Id1") - assert_equal tsv.fields, tsv1.fields - - CacheHelper.clean 'test_tsv_cache' + tsv = TSV.new(filename + '#:sep=/\s+/') + assert tsv['row1']["OtherID"].include?("Id1") + assert ! tsv['row1']["OtherID"].include?(:Id1) end - - end + end end