require 'spec_helper'

module Bio
  module MAF

    describe ParseContext do
      it "tracks the last block position"
    end

    describe ChunkReader do
      before(:each) do
        @f = (TestData + 'mm8_chr7_tiny.maf').open
      end
      describe "#initialize" do
        it "rejects a chunk size of zero" do
          expect {
            ChunkReader.new(@f, 0)
          }.to raise_error(/Invalid chunk size/)
        end
        it "rejects a negative chunk size" do
          expect {
            ChunkReader.new(@f, 0)
          }.to raise_error(/Invalid chunk size/)
        end
        it "rejects a chunk size not a power of 2" do
          expect {
            ChunkReader.new(@f, 1000)
          }.to raise_error(/Invalid chunk size/)
        end
        it "accepts a 4k chunk size" do
          expect {
            ChunkReader.new(@f, 4096)
          }.not_to raise_error
        end
        it "accepts an 8M chunk size" do
          expect {
            ChunkReader.new(@f, 8 * 1024 * 1024)
          }.not_to raise_error
        end
      end
      context "with 1K ChunkReader" do
        before(:each) do
          @r = ChunkReader.new(@f, 1024)
        end
 
        describe "#chunk_size=" do
          it "sets the chunk size" do
            @r.chunk_size = 8192
            @r.chunk_size.should == 8192
          end
          # it "sets the chunk shift" do
          #   @r.chunk_size = 8192
          #   @r.chunk_shift.should == 13
          # end
        end

        describe "#read_chunk" do
          it "returns a chunk of the specified length" do
            @r.read_chunk.bytesize == 1024
          end
          it "starts at position 0" do
            @r.pos.should == 0
          end
          it "advances the position" do
            @r.read_chunk
            @r.pos.should == 1024
          end
        end

        describe "#read_chunk_at" do
          it "returns data starting at the specified offset" do
            c = @r.read_chunk_at(59)
            c.start_with?("80082334").should be_true
          end
          it "handles a read starting exactly at a chunk boundary" do
            c = @r.read_chunk_at(1024)
            c.start_with?("   594").should be_true
          end
        end
      end
      after(:each) do
        @f.close
      end
    end

    shared_examples "parsers" do
      
      describe "creation" do
        it "opens a file specified as a String argument"
        it "takes an IO object as an open file"
        it "raises an error when the file does not exist" do
          expect {
            described_class.new("/doesnotexist")
          }.to raise_error(Errno::ENOENT)
        end
        it "raises an error when the file is not in MAF format" do
          expect {
            described_class.new(TestData + '../../Rakefile')
          }.to raise_error
        end
      end

      describe "#header" do
        it "parses the MAF header" do
          p = described_class.new(TestData + 't1.maf')
          p.header.should_not be_nil
        end
      end

      describe "#fetch_blocks" do
        shared_examples_for "any chunk size" do
          it "parses a single block" do
            fl = [[16, 1087]]
            blocks = @p.fetch_blocks(fl).to_a
            blocks.size.should == 1
            blocks[0].offset.should == 16
          end
          it "parses several consecutive blocks" do
            fl = [[16, 1087], [1103, 1908], [3011, 2027]]
            blocks = @p.fetch_blocks(fl).to_a
            blocks.size.should == 3
            blocks.collect {|b| b.offset}.should == [16, 1103, 3011]
          end
          it "parses consecutive blocks further ahead" do
            fl = [[5038, 1647], [6685, 829]]
            blocks = @p.fetch_blocks(fl).to_a
            blocks.size.should == 2
            blocks.collect {|b| b.offset}.should == [5038, 6685]
          end
          it "parses nonconsecutive blocks" do
            fl = [[16, 1087], [3011, 2027]]
            blocks = @p.fetch_blocks(fl).to_a
            blocks.size.should == 2
            blocks.collect {|b| b.offset}.should == [16, 3011]
          end 
          it "does not return empty blocks" do
            fl = [[16, 1087]]
            @p.sequence_filter = { :only_species => %w(jabberwocky unicorn) }
            blocks = @p.fetch_blocks(fl).to_a
            blocks.size.should == 0
          end
         it "takes a block argument" do
            fl = [[16, 1087], [1103, 1908], [3011, 2027]]
            n = 0
            @p.fetch_blocks(fl) do |block|
              n += 1
            end
            n.should == 3
          end
        end
        context "with 4K chunk size" do
          before(:each) do
            @p = described_class.new(TestData + 'mm8_chr7_tiny.maf',
                                     :chunk_size => 4096,
                                     :random_chunk_size => 4096)
          end
          it_behaves_like "any chunk size"
        end
        context "with 1K chunk size" do
          before(:each) do
            @p = described_class.new(TestData + 'mm8_chr7_tiny.maf',
                                     :chunk_size => 1024,
                                     :random_chunk_size => 1024)
          end
          it_behaves_like "any chunk size"
        end
        context "after parsing to end" do
          before(:each) do
            @p = described_class.new(TestData + 'mm8_chr7_tiny.maf',
                                     :chunk_size => 4096,
                                     :random_chunk_size => 4096)
            @p.each_block { |b| nil }
          end
          it_behaves_like "any chunk size"
        end
        context "with 8M chunk size" do
          before(:each) do
            @p = described_class.new(TestData + 'mm8_chr7_tiny.maf',
                                     :chunk_size => 8 * 1024 * 1024,
                                     :random_chunk_size => 8 * 1024 * 1024)
          end
          it_behaves_like "any chunk size"
        end
        it "handles a skipped block in a BGZF file" do
          @p = described_class.new(TestData + 'mm8.chrM.maf.bgz')
          blocks = @p.fetch_blocks([[5141084112, 2100],
                                    [5141087379, 2006]])
          blocks.collect { |b| b.offset }.should == [5141084112, 5141087379]
        end
        after(:each) do
          @p.f.close
        end
      end

      describe "#each_block" do
        it "returns an Enumerator when called without a block" do
          p = described_class.new(TestData + 'mm8_chr7_tiny.maf')
          p.each_block.count.should == 8
        end
        it "works when parsing from a pipe" do
          IO.popen("cat #{TestData + 'mm8_chr7_tiny.maf'}") do |pipe|
            p = described_class.new(pipe)
            p.each_block.count.should == 8
          end
        end
        it "is not called with an empty MAF file" do
          called = false
          p = described_class.new(TestData + 'empty.maf')
          p.each_block { called = true }
          called.should be_false
        end
      end

      describe "sequence_filter" do
        before(:each) do
          @p = described_class.new(TestData + 'mm8_mod_a.maf')
        end
        it "restricts sequences parsed" do
          @p.sequence_filter = { :only_species => %w(mm8 rn4) }
          @p.parse_block.sequences.size.should == 2
        end
        it "matches at the species delimiter rather than a prefix" do
          @p.sequence_filter = { :only_species => %w(mm8 hg18) }
          @p.parse_block.sequences.size.should == 2
        end
        it "sets filtered? when modified" do
          @p.sequence_filter = { :only_species => %w(mm8 rn4) }
          @p.parse_block.filtered?.should be_true
        end
        it "does not set filtered? when unmodified" do
          @p.sequence_filter = {
            :only_species => %w(mm8 rn4 oryCun1 hg18 hg181)
          }
          @p.parse_block.filtered?.should be_false
        end
        it "does not return empty blocks" do
          @p.sequence_filter = { :only_species => %w(jabberwocky unicorn) }
          @p.parse_blocks.count.should == 0
        end
      end

      context "at end of file" do
        describe "#parse_block" do
          it "returns nil"
        end
      end

      describe "#parse_block" do
        it "returns an alignment block" do
          p = described_class.new(TestData + 't1.maf')
          b = p.parse_block()
          b.should_not be_nil
        end
        it "raises an exception for malformed data"
      end

      it "gives the correct number of sequences" do
        p = described_class.new(TestData + 'mm8_chr7_tiny.maf')
        block = p.parse_block
        block.sequences.size.should == 10
      end

      it "handles absent alignment parameters" do
        p = described_class.new(TestData + 'chrY-1block.maf')
        b = p.parse_block()
        b.should_not be_nil
      end

      it "parses larger files" do
        p = described_class.new(TestData + 'mm8_chr7_tiny.maf')
        expect {
          p.each_block { |block| block }
        }.not_to raise_error
      end

      it "handles trailing comments" do
        p = described_class.new(TestData + 't1a.maf')
        expect {
          p.each_block { |block| block }
        }.not_to raise_error
      end

      it "raises an exception on inconsistent sequence length" do
        pending
        ## can't just do string length, have to skip over hyphens
      end

      it "tracks block start offsets correctly" do
        pa = []
        p = described_class.new(TestData + 'mm8_chr7_tiny.maf')
        p.each_block { |b| pa << b.offset }
        pa.should == [16, 1103, 3011, 5038, 6685, 7514, 9022, 10113]
      end

      it "reports block sizes correctly" do
        p = described_class.new(TestData + 'mm8_chr7_tiny.maf')
        block = p.parse_block
        block.size.should == 1087
      end

      it "parses very large blocks" do
        p = described_class.new(TestData + 'big-block.maf')
        n = 0
        p.each_block { |b| n += 1 }
        n.should == 490
      end

    end

    describe Parser do
      include_examples "parsers"

      def with_const_value(mod, sym, value)
        old = mod.const_get(sym)
        mod.const_set(sym, value)
        begin
          yield
        ensure
          mod.const_set(sym, old)
        end
      end

      describe "#merge_fetch_list" do
        before(:each) do
          @p = described_class.new(TestData + 'mm8_chr7_tiny.maf')
        end
        it "passes through single records" do
          fl = [[16, 1087]]
          @p.merge_fetch_list(fl).should == [[16, 1087, [16]]]
        end
        it "passes through non-contiguous records" do
          fl = [[16, 1087], [3011, 2027]]
          @p.merge_fetch_list(fl).should == [[16, 1087, [16]],
                                             [3011, 2027, [3011]]]
        end
        it "merges contiguous records" do
          fl = [[16, 1087], [1103, 1908], [3011, 2027]]
          @p.merge_fetch_list(fl).should == [[16, 5022, [16, 1103, 3011]]]
        end
        after(:each) do
          @p.f.close
        end
      end

      it "sets last block position correctly" do
        p = Parser.new(TestData + 'mm8_subset_a.maf')
        p.last_block_pos.should == 1103
      end

      context "with 2k chunk size" do
        before(:each) do
          @p = Parser.new(TestData + 'mm8_chr7_tiny.maf',
                          :chunk_size => 2048)
        end
        it "yields the correct number of blocks over chunk boundaries" do
          ref_scores = %w(10542.0 -33148.0 87527.0 185399.0
                          30120.0 58255.0 2607.0 8132.0)
          scores = []
          @p.each_block do |block|
            scores << block.vars[:score]
          end
          scores.should == ref_scores
        end
        it "sets last_block_pos correctly" do
          @p.last_block_pos.should == 1103
        end
        it "handles sequence lines over chunk boundaries" do
          @p.parse_block
          block = @p.parse_block
          break_seq = block.raw_seq(4)
          break_seq.text.size.should == 156
        end

        it "tracks block start offsets correctly over chunk boundaries" do
          pa = []
          @p.each_block { |b| pa << b.offset }
          pa.should == [16, 1103, 3011, 5038, 6685, 7514, 9022, 10113]
        end
      end

      describe ":join_blocks" do
        it "returns joined blocks" do
          p = Parser.new(TestData + 'mm8_chr7_tiny.maf',
                         :join_blocks => true)
          p.sequence_filter = {
            :only_species => %w(mm8 rn4 oryCun1 hg18 panTro2 rheMac2 canFam2 loxAfr1 echTel1)
          }
          idx = KyotoIndex.open(TestData + "mm8_chr7_tiny.kct")
          l = idx.find([GenomicInterval.zero_based('mm8.chr7',
                                                   80082334,
                                                   80082471)],
                       p).to_a
          l.size.should == 1
          l.first.text_size.should == 210
        end
      end

      describe ":remove_gaps" do
        it "removes gaps from #parse_blocks" do
          p = Parser.new(TestData + 'mm8_chr7_tiny.maf',
                         :remove_gaps => true)
          p.sequence_filter = { :only_species => %w(mm8 rn4) }
          b = p.parse_blocks.first
          b.text_size.should == 34
        end
        it "removes gaps from #parse_block" do
          p = Parser.new(TestData + 'mm8_chr7_tiny.maf',
                         :remove_gaps => true)
          p.sequence_filter = { :only_species => %w(mm8 rn4) }
          b = p.parse_block
          b.text_size.should == 34
        end
      end

      describe ":as_bio_alignment" do
        it "returns bio-alignment objects" do
          p = Parser.new(TestData + 'mm8_chr7_tiny.maf',
                         :as_bio_alignment => true)
          idx = KyotoIndex.open(TestData + "mm8_chr7_tiny.kct")
          l = idx.find([GenomicInterval.zero_based('mm8.chr7',
                                                   80082334,
                                                   80082471)],
                       p).to_a
          l.size.should == 2
          l.first[0][0].should == 'G'
          l.first[0].id.should == 'mm8.chr7'
        end
      end

    end

  end
  
end