require 'bindata' require 'zlib' # An example of a reader / writer for the GZIP file format as per rfc1952. # See notes at the end of this file for implementation discussions. class Gzip < BinData::Record # Binary representation of a ruby Time object class Mtime < BinData::Primitive uint32le :time def set(val) self.time = val.to_i end def get Time.at(time) end end # Known compression methods DEFLATE = 8 endian :little uint16 :ident, :asserted_value => 0x8b1f uint8 :compression_method, :initial_value => DEFLATE bit3 :freserved, :asserted_value => 0 bit1 :fcomment, :value => lambda { comment.length > 0 ? 1 : 0 } bit1 :ffile_name, :value => lambda { file_name.length > 0 ? 1 : 0 } bit1 :fextra, :value => lambda { extra.len > 0 ? 1 : 0 } bit1 :fcrc16, :value => 0 # see note at end of file bit1 :ftext mtime :mtime uint8 :extra_flags uint8 :os, :initial_value => 255 # unknown OS # The following fields are optional depending on the bits in flags struct :extra, :onlyif => lambda { fextra.nonzero? } do uint16 :len, :length => lambda { data.length } string :data, :read_length => :len end stringz :file_name, :onlyif => lambda { ffile_name.nonzero? } stringz :comment, :onlyif => lambda { fcomment.nonzero? } uint16 :crc16, :onlyif => lambda { fcrc16.nonzero? } # The length of compressed data must be calculated from the current file offset count_bytes_remaining :bytes_remaining string :compressed_data, :read_length => lambda { bytes_remaining - footer.num_bytes } struct :footer do uint32 :crc32 uint32 :uncompressed_size end def data=(data) # Zlib.deflate includes a header + footer which we must discard self.compressed_data = Zlib::Deflate.deflate(data)[2..-5] self.footer.crc32 = Zlib::crc32(data) self.footer.uncompressed_size = data.size end end if __FILE__ == $0 # Write a gzip file. print "Creating a gzip file ... " g = Gzip.new g.data = "the cat sat on the mat" g.file_name = "poetry" g.mtime = Time.now g.comment = "A stunning piece of prose" File.open("poetry.gz", "w") do |io| g.write(io) end puts "done." puts # Read the created gzip file. print "Reading newly created gzip file ... " g = Gzip.new File.open("poetry.gz", "r") do |io| g.read(io) end puts "done." puts puts "Printing gzip file details in the format of gzip -l -v" # compression ratio ratio = 100.0 * (g.footer.uncompressed_size - g.compressed_data.size) / g.footer.uncompressed_size comp_meth = (g.compression_method == Gzip::DEFLATE) ? "defla" : "" # Output using the same format as gzip -l -v puts "method crc date time compressed " + "uncompressed ratio uncompressed_name" puts "%5s %08x %6s %5s %19s %19s %5.1f%% %s" % [comp_meth, g.footer.crc32, g.mtime.strftime('%b %d'), g.mtime.strftime('%H:%M'), g.num_bytes, g.footer.uncompressed_size, ratio, g.file_name] puts "Comment: #{g.comment}" if g.comment != "" puts puts "Executing gzip -l -v" puts `gzip -l -v poetry.gz` end # Notes: # # Mtime: A convenience wrapper that allow a ruby Time object to be used instead # of manually dealing with the raw form (seconds since 1 Jan 1970) # # rfc1952 specifies an optional crc16 field. The gzip command line client # uses this field for multi-part gzip. Hence we ignore this. # We are cheating and using the Zlib library for compression. We can't use # this library for decompression as zlib requires an adler32 checksum while # gzip uses crc32.