Sha256: 9df6b7b01bf6bad23a1350461367b752d5a88b3bacb0c6207f334bf8c50de086

Contents?: true

Size: 1.72 KB

Versions: 2

Compression:

Stored size: 1.72 KB

Contents

module Bio::BGZF

  class FormatError < StandardError
  end

  class NotBGZFError < FormatError
  end

  def read_bgzf_block(f)
    hstart = f.read(12)
    return nil if hstart == nil # EOF?
    magic, gzip_extra_length = hstart.unpack('Vxxxxxxv')
    raise NotBGZFError, "wrong BGZF magic: #{sprintf('%08x', magic)}" unless magic == 0x04088B1F

    len = 0
    bsize = nil
    while len < gzip_extra_length do
      si1, si2, slen = f.read(4).unpack('CCv')
      if si1 == 66 and si2 == 67 then
        raise FormatError, "BC subfield length is #{slen} but must be 2" if slen != 2
        raise FormatError, 'duplicate field with block size' unless bsize.nil?
        bsize = f.read(2).unpack('v')[0]
        f.seek(slen - 2, IO::SEEK_CUR)
      else
        f.seek(slen, IO::SEEK_CUR)
      end
      len += 4 + slen
    end

    if len != gzip_extra_length then
      raise FormatError, "total length of subfields is #{len} bytes but must be #{gzip_extra_length}"
    end
    raise NotBGZFError, 'block size was not found in any subfield' if bsize.nil?

    compressed_data = f.read(bsize - gzip_extra_length - 19)
    crc32, input_size = f.read(8).unpack('VV')

    return compressed_data, input_size, crc32
  end
  module_function :read_bgzf_block

  def decompress_block(f)
    cdata, in_size, expected_crc = read_bgzf_block(f)
    return nil if cdata == nil
    data = unpack(cdata)
    if data.bytesize != in_size
      raise FormatError, "Expected #{in_size} bytes from BGZF block at #{pos}, but got #{data.bytesize} bytes!"
    end
    crc = Zlib.crc32(data, 0)
    if crc != expected_crc
      raise FormatError, "CRC error: expected #{expected_crc.to_s(16)}, got #{crc.to_s(16)}"
    end
    return data
  end
  module_function :decompress_block

end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
bio-bgzf-0.2.1 lib/bio-bgzf/block.rb
bio-bgzf-0.2.0 lib/bio-bgzf/block.rb