require File.join(File.expand_path(File.dirname(__FILE__)), '../..', 'test_helper.rb')
require 'rbbt/tsv'
require 'rbbt/tsv/attach'
require 'rbbt'

class TestAttach < Test::Unit::TestCase
  def test_attach_same_key
    content1 =<<-EOF
#Id    ValueA    ValueB
row1    a|aa|aaa    b
row2    A    B
    EOF

    content2 =<<-EOF
#ID    ValueB    OtherID
row1    b    Id1|Id2
row3    B    Id3
    EOF

    tsv1 = tsv2 = nil
    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    TmpFile.with_file(content2) do |filename|
      tsv2 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    tsv1.attach_same_key tsv2, "OtherID"

    assert_equal %w(ValueA ValueB OtherID), tsv1.fields
    assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]

    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    tsv1.attach_same_key tsv2

    assert_equal %w(ValueA ValueB OtherID), tsv1.fields

    tsv1 = tsv2 = nil
    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :list, :sep => /\s+/)
    end

    TmpFile.with_file(content2) do |filename|
      tsv2 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    tsv1.attach_same_key tsv2, "OtherID"

    assert_equal %w(ValueA ValueB OtherID), tsv1.fields
    assert_equal "Id1", tsv1["row1"]["OtherID"]
  end

  def test_attach_source_field
    content1 =<<-EOF
#Id    ValueA    ValueB
row1    a|aa|aaa    b
row2    A    B
    EOF

    content2 =<<-EOF
#ValueB    OtherID
b    Id1|Id2
B    Id3
    EOF

    tsv1 = tsv2 = nil
    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    TmpFile.with_file(content2) do |filename|
      tsv2 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    tsv1.attach_source_key tsv2, "ValueB"

    assert_equal %w(ValueA ValueB OtherID), tsv1.fields
    assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]

    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :list, :sep => /\s+/)
    end

    tsv1.attach_source_key tsv2, "ValueB"


    assert_equal %w(ValueA ValueB OtherID), tsv1.fields
    assert_equal "Id1", tsv1["row1"]["OtherID"]
  end

  def test_attach_index
    content1 =<<-EOF
#Id    ValueA    ValueB
row1    a|aa|aaa    b
row2    A    B
    EOF

    content2 =<<-EOF
#ValueE    OtherID
e    Id1|Id2
E    Id3
    EOF

    content_index =<<-EOF
#Id    ValueE
row1    e
row2    E
    EOF

    tsv1 = tsv2 = index = nil
    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    TmpFile.with_file(content2) do |filename|
      tsv2 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    TmpFile.with_file(content_index) do |filename|
      index = TSV.open(File.open(filename), :flat, :sep => /\s+/)
    end

    tsv1.attach_index tsv2, index

    assert_equal %w(ValueA ValueB OtherID), tsv1.fields
    assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]

    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :list, :sep => /\s+/)
    end

    tsv1.attach_index tsv2, index

    assert_equal %w(ValueA ValueB OtherID), tsv1.fields
    assert_equal "Id1", tsv1["row1"]["OtherID"]
  end

  def test_attach
    content1 =<<-EOF
#Id    ValueA    ValueB
row1    a|aa|aaa    b
row2    A    B
    EOF

    content2 =<<-EOF
#Id    ValueB    OtherID
row1    b    Id1|Id2
row3    B    Id3
    EOF

    content3 =<<-EOF
#ValueB    OtherID
b    Id1|Id2
B    Id3
    EOF
   tsv1 = tsv2 = tsv3 = nil
    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    TmpFile.with_file(content2) do |filename|
      tsv2 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    TmpFile.with_file(content3) do |filename|
      tsv3 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    tsv1.attach tsv2, :fields => "OtherID"

    assert_equal %w(ValueA ValueB OtherID), tsv1.fields
    assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]

    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    tsv1.attach tsv3

    assert_equal %w(ValueA ValueB OtherID), tsv1.fields
    assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]

  end

  def test_attach_using_index
    content1 =<<-EOF
#: :sep=/\\s+/
#Id    ValueA    ValueB
row1    a|aa|aaa    b
row2    A    B
    EOF

    content2 =<<-EOF
#: :sep=/\\s+/
#ValueE    OtherID
e    Id1|Id2
E    Id3
    EOF

    content_index =<<-EOF
#: :sep=/\\s+/
#Id    ValueE
row1    e
row2    E
    EOF

    Rbbt.claim Rbbt.tmp.test.test1.data, :string, content1
    Rbbt.claim Rbbt.tmp.test.test2.data, :string, content2
    Rbbt.claim Rbbt.tmp.test.test2.identifiers, :string, content_index

    tsv1 = tsv2 = nil

    tsv1 = Rbbt.tmp.test.test1.data.tsv :double,  :sep => /\s+/
    tsv2 = Rbbt.tmp.test.test2.data.tsv :double,  :sep => /\s+/

    tsv2.identifiers = Rbbt.tmp.test.test2.identifiers.produce.find #.to_s

    tsv1.attach tsv2, :fields => ["OtherID"] #, :persist_input => true
    
    assert_equal tsv1.fields, %w(ValueA ValueB OtherID)
    assert_equal %w(Id1 Id2), tsv1["row1"]["OtherID"]
  end

  def test_find_path
     content1 =<<-EOF
#: :sep=/\\s+/#:case_insensitive=false
#Id    ValueA    ValueB
row1    a|aa|aaa    b
row2    A    B
    EOF

    content2 =<<-EOF
#: :sep=/\\s+/#:case_insensitive=false
#OtherID ValueE
Id1|Id2    e    
Id3        E    
    EOF

    content_identifiers =<<-EOF
#: :sep=/\\s+/#:case_insensitive=false
#Id    ValueE
row1    e
row2    E
    EOF

    tsv1 = tsv2 = identifiers = nil
    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(Path.setup(filename), :key => "Id")
    end

    TmpFile.with_file(content2) do |filename|
      tsv2 = TSV.open(Path.setup(filename), :double)
    end

    TmpFile.with_file(content_identifiers) do |filename|
      identifiers = TSV.open(Path.setup(filename), :flat, :sep => /\s+/)
    end

    tsv1.identifiers = identifiers

    tsv1.attach tsv2
 
    assert_equal %w(ValueA ValueB ValueE), tsv1.fields
  end

  def test_merge_different_rows
    file1 =<<-EOF
row6 dd dd ee
row1 a b c
row2 A B C
row3 1 2 3
   EOF
    file2 =<<-EOF
row20 rr rr
row1 d e
row2 D E
row4 x y z
    EOF
    result =<<-EOF
row1 a b c d e
row2 A B C D E
row20    rr rr
row3 1 2 3  
row4    x y z
row6 dd dd ee  
    EOF

    TmpFile.with_file do |f|
      TSV.merge_different_fields(StringIO.new(file1), StringIO.new(file2), f, " ")
      assert_equal result, Open.read(f)
    end
  end

  def test_merge_different_rows_tsv
    file1 =<<-EOF
row6 dd dd ee
row1 a b c
row2 A B C
row3 1 2 3
   EOF
    file2 =<<-EOF
row20 rr rr
row1 d e
row2 D E
row4 x y
    EOF
    result =<<-EOF
row1 a b c d e
row2 A B C D E
row20    rr rr
row3 1 2 3  
row4    x y
row6 dd dd ee  
    EOF

    TmpFile.with_file do |f|
      tsv1 = TSV.open StringIO.new(file1), :sep => " "
      tsv2 = TSV.open StringIO.new(file2), :sep => " "
      tsv_r = tsv1.merge_different_fields tsv2
      assert_equal result, tsv_r.to_s(tsv_r.keys.sort, true).gsub(/\t/,' ')
    end
  end

  def test_merge_different_rows_split_lines
    file1 =<<-EOF
row6,dd,dd,ee
row1,a,b,c
row1,aa,bb,cc
row2,A,B,C
row3,1,2,3
   EOF
    file2 =<<-EOF
row20,rr,rr
row1,d,e
row2,D,E
row4,x,y
    EOF

    # Might be slightly different ...
    result1 =<<-EOF
row1,aa|a,bb|b,cc|c,d,e
row2,A,B,C,D,E
row20,,,,rr,rr
row3,1,2,3,,
row4,,,,x,y
row6,dd,dd,ee,,
    EOF
    result2 =<<-EOF
row1,a|aa,b|bb,c|cc,d,e
row2,A,B,C,D,E
row20,,,,rr,rr
row3,1,2,3,,
row4,,,,x,y
row6,dd,dd,ee,,
    EOF

    TmpFile.with_file do |f|
      TSV.merge_different_fields StringIO.new(file1), StringIO.new(file2), f, ','

      # ... so check for either
      assert(Open.read(f) == result1 || Open.read(f) == result2)
    end
  end

  def test_merge_different_rows_split_lines_tsv
    file1 =<<-EOF
row6,dd,dd,ee
row1,a,b,c
row1,aa,bb,cc
row2,A,B,C
row3,1,2,3
   EOF
    file2 =<<-EOF
row20,rr,rr
row1,d,e
row2,D,E
row4,x,y
    EOF
    result =<<-EOF
row1,aa|a,bb|b,cc|c,d,e
row2,A,B,C,D,E
row20,,,,rr,rr
row3,1,2,3,,
row4,,,,x,y
row6,dd,dd,ee,,
    EOF

    TmpFile.with_file do |f|
      data1 = TSV.open StringIO.new(file1), :sep => ',', :merge => true
      data2 = TSV.open StringIO.new(file2), :sep => ',', :merge => true
      data3 = data1.merge_different_fields(data2)
      data3.each do |key, list|
        list.each do |l| l.replace l.sort_by{|v| v.length}.reverse end
      end

      assert_equal result, data3.to_s(:sort, true).gsub(/\t/,',')
    end
  end

  def test_merge_rows
    file1 =<<-EOF
row1,a,b,c
row1,aa,bb,cc
row2,A,B,C
row3,1,2,3
    EOF
    TmpFile.with_file(file1) do |input|
      TmpFile.with_file() do |output|
        TSV.merge_row_fields Open.open(input), output
        assert Open.read(output) =~ /a|aa/
      end
    end


  end

  def test_one2one
    content1 =<<-EOF
#Id    ValueA    ValueB
row1    a|aa|aaa    b
row2    A    B
row3    A    b|bb
    EOF

    content2 =<<-EOF
#ValueB    OtherID
b    Id1|Id2
B    Id3
bb   Id4
    EOF
   tsv1 = tsv2 = nil
    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    TmpFile.with_file(content2) do |filename|
      tsv2 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    tsv1.attach tsv2, :one2one => true

    assert_equal %w(ValueA ValueB OtherID), tsv1.fields
    assert_equal %w(Id1 Id4), tsv1["row3"]["OtherID"]
  end

  def test_attach_flat
    content1 =<<-EOF
#Id    ValueA    ValueB
row1    a|aa|aaa    b
row2    A    B
    EOF

    content2 =<<-EOF
#ValueA    OtherID
a    Id1|Id2
A    Id3
    EOF

    tsv1 = tsv2 = index = nil
    TmpFile.with_file(content1) do |filename|
      tsv1 = TSV.open(File.open(filename), :flat, :fields => ["ValueA"], :sep => /\s+/)
      puts tsv1
    end

    TmpFile.with_file(content2) do |filename|
      tsv2 = TSV.open(File.open(filename), :double, :sep => /\s+/)
    end

    res = tsv1.attach tsv2, :fields => ["OtherID"]
    puts res

  end
end