Sha256: 73957f417459a7a7d5127ada2536cf3b372bf0406152d314f5e2fc932cd264b7

Contents?: true

Size: 1.52 KB

Versions: 1

Compression:

Stored size: 1.52 KB

Contents

module Bio::BGZF

  def read_bgzf_block(f)
    hstart = f.read(12)
    return nil if hstart == nil # EOF?
    magic, gzip_extra_length = hstart.unpack('Vxxxxxxv')
    raise 'wrong BGZF magic' unless magic == 0x04088B1F

    len = 0
    bsize = nil
    while len < gzip_extra_length do
      si1, si2, slen = f.read(4).unpack('CCv')
      if si1 == 66 and si2 == 67 then
        raise "BC subfield length is #{slen} but must be 2" if slen != 2
        raise 'duplicate field with block size' unless bsize.nil?
        bsize = f.read(2).unpack('v')[0]
        f.seek(slen - 2, IO::SEEK_CUR)
      else
        f.seek(slen, IO::SEEK_CUR)
      end
      len += 4 + slen
    end

    if len != gzip_extra_length then
      raise "total length of subfields is #{len} bytes but must be #{gzip_extra_length}"
    end
    raise 'block size was not found in any subfield' if bsize.nil?

    compressed_data = f.read(bsize - gzip_extra_length - 19)
    crc32, input_size = f.read(8).unpack('VV')

    return compressed_data, input_size, crc32
  end
  module_function :read_bgzf_block

  def decompress_block(f)
    cdata, in_size, expected_crc = read_bgzf_block(f)
    return nil if cdata == nil
    crc = Zlib.crc32(cdata, 0)
    if crc != expected_crc
      raise "CRC error: expected #{expected_crc.to_s(16)}, got #{crc.to_s(16)}"
    end
    data = unpack(cdata)
    if data.bytesize != in_size
      raise "Expected #{in_size} bytes from BGZF block at #{pos}, but got #{data.bytesize} bytes!"
    end
    return data
  end
  module_function :decompress_block

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
bio-bgzf-0.1.0 lib/bio-bgzf/block.rb