# A low-level ZIP file data writer. You can use it to write out various headers and central directory elements # separately. The class handles the actual encoding of the data according to the ZIP format APPNOTE document. # # The primary reason the writer is a separate object is because it is kept stateless. That is, all the data that # is needed for writing a piece of the ZIP (say, the EOCD record, or a data descriptor) can be written # without depending on data available elsewhere. This makes the writer very easy to test, since each of # it's methods outputs something that only depends on the method's arguments. For example, we use this # to test writing Zip64 files which, when tested in a streaming fashion, would need tricky IO stubs # to wind IO objects back and forth by large offsets. Instead, we can just write out the EOCD record # with given offsets as arguments. # # Since some methods need a lot of data about the entity being written, everything is passed via # keyword arguments - this way it is much less likely that you can make a mistake writing something. # # Another reason for having a separate Writer is that most ZIP libraries attach the methods for # writing out the file headers to some sort of Entry object, which represents a file within the ZIP. # However, when you are diagnosing issues with the ZIP files you produce, you actually want to have # absolute _most_ of the code responsible for writing the actual encoded bytes available to you on # one screen. Altering or checking that code then becomes much, much easier. The methods doing the # writing are also intentionally left very verbose - so that you can follow what is happening at # all times. # # All methods of the writer accept anything that responds to `<<` as `io` argument - you can use # that to output to String objects, or to output to Arrays that you can later join together. class ZipTricks::ZipWriter FOUR_BYTE_MAX_UINT = 0xFFFFFFFF TWO_BYTE_MAX_UINT = 0xFFFF ZIP_TRICKS_COMMENT = 'Written using ZipTricks %s' % ZipTricks::VERSION VERSION_MADE_BY = 52 VERSION_NEEDED_TO_EXTRACT = 20 VERSION_NEEDED_TO_EXTRACT_ZIP64 = 45 DEFAULT_EXTERNAL_ATTRS = begin # These need to be set so that the unarchived files do not become executable on UNIX, for # security purposes. Strictly speaking we would want to make this user-customizable, # but for now just putting in sane defaults will do. For example, Trac with zipinfo does this: # zipinfo.external_attr = 0644 << 16L # permissions -r-wr--r--. # We snatch the incantations from Rubyzip for this. unix_perms = 0644 file_type_file = 010 external_attrs = (file_type_file << 12 | (unix_perms & 07777)) << 16 end MADE_BY_SIGNATURE = begin # A combination of the VERSION_MADE_BY low byte and the OS type high byte os_type = 3 # UNIX [VERSION_MADE_BY, os_type].pack('CC') end C_V = 'V'.freeze # Encode a 4-byte unsigned little-endian uint C_v = 'v'.freeze # Encode a 2-byte unsigned little-endian uint C_Qe = 'Q<'.freeze # Encode an 8-byte unsigned little-endian uint C_C = 'C'.freeze # For bit-encoded strings C_N = 'N'.freeze # Encode a 4-byte signed little-endian int private_constant :FOUR_BYTE_MAX_UINT, :TWO_BYTE_MAX_UINT, :VERSION_MADE_BY, :VERSION_NEEDED_TO_EXTRACT, :VERSION_NEEDED_TO_EXTRACT_ZIP64, :DEFAULT_EXTERNAL_ATTRS, :MADE_BY_SIGNATURE, :C_V, :C_v, :C_Qe, :ZIP_TRICKS_COMMENT # Writes the local file header, that precedes the actual file _data_. # # @param io[#<<] the buffer to write the local file header to # @param filename[String] the name of the file in the archive # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP # @param uncompressed_size[Fixnum] The size of the file once extracted # @param crc32[Fixnum] The CRC32 checksum of the file # @param mtime[Time] the modification time to be recorded in the ZIP # @param gp_flags[Fixnum] bit-packed general purpose flags # @param storage_mode[Fixnum] 8 for deflated, 0 for stored... # @return [void] def write_local_file_header(io:, filename:, compressed_size:, uncompressed_size:, crc32:, gp_flags:, mtime:, storage_mode:) requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT) io << [0x04034b50].pack(C_V) # local file header signature 4 bytes (0x04034b50) if requires_zip64 # version needed to extract 2 bytes io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) else io << [VERSION_NEEDED_TO_EXTRACT].pack(C_v) end io << [gp_flags].pack(C_v) # general purpose bit flag 2 bytes io << [storage_mode].pack(C_v) # compression method 2 bytes io << [to_binary_dos_time(mtime)].pack(C_v) # last mod file time 2 bytes io << [to_binary_dos_date(mtime)].pack(C_v) # last mod file date 2 bytes io << [crc32].pack(C_V) # crc-32 4 bytes if requires_zip64 io << [FOUR_BYTE_MAX_UINT].pack(C_V) # compressed size 4 bytes io << [FOUR_BYTE_MAX_UINT].pack(C_V) # uncompressed size 4 bytes else io << [compressed_size].pack(C_V) # compressed size 4 bytes io << [uncompressed_size].pack(C_V) # uncompressed size 4 bytes end # Filename should not be longer than 0xFFFF otherwise this wont fit here io << [filename.bytesize].pack(C_v) # file name length 2 bytes # Interesting tidbit: # https://social.technet.microsoft.com/Forums/windows/en-US/6a60399f-2879-4859-b7ab-6ddd08a70948 # TL;DR of it is: Windows 7 Explorer _will_ open Zip64 entries. However, it desires to have the # Zip64 extra field as _the first_ extra field. extra_fields = if requires_zip64 zip_64_extra_for_local_file_header(compressed_size: compressed_size, uncompressed_size: uncompressed_size) else '' end extra_fields << timestamp_extra(mtime) io << [extra_fields.bytesize].pack(C_v) # extra field length 2 bytes io << filename # file name (variable size) io << extra_fields end # Writes the file header for the central directory, for a particular file in the archive. When writing out this data, # ensure that the CRC32 and both sizes (compressed/uncompressed) are correct for the entry in question. # # @param io[#<<] the buffer to write the local file header to # @param filename[String] the name of the file in the archive # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP # @param uncompressed_size[Fixnum] The size of the file once extracted # @param crc32[Fixnum] The CRC32 checksum of the file # @param mtime[Time] the modification time to be recorded in the ZIP # @param external_attrs[Fixnum] bit-packed external attributes (defaults to UNIX file with 0644 permissions set) # @param gp_flags[Fixnum] bit-packed general purpose flags # @return [void] def write_central_directory_file_header(io:, local_file_header_location:, gp_flags:, storage_mode:, compressed_size:, uncompressed_size:, mtime:, crc32:, filename:, external_attrs: DEFAULT_EXTERNAL_ATTRS) # At this point if the header begins somewhere beyound 0xFFFFFFFF we _have_ to record the offset # of the local file header as a zip64 extra field, so we give up, give in, you loose, love will always win... add_zip64 = (local_file_header_location > FOUR_BYTE_MAX_UINT) || (compressed_size > FOUR_BYTE_MAX_UINT) || (uncompressed_size > FOUR_BYTE_MAX_UINT) io << [0x02014b50].pack(C_V) # central file header signature 4 bytes (0x02014b50) io << MADE_BY_SIGNATURE # version made by 2 bytes if add_zip64 io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) # version needed to extract 2 bytes else io << [VERSION_NEEDED_TO_EXTRACT].pack(C_v) # version needed to extract 2 bytes end io << [gp_flags].pack(C_v) # general purpose bit flag 2 bytes io << [storage_mode].pack(C_v) # compression method 2 bytes io << [to_binary_dos_time(mtime)].pack(C_v) # last mod file time 2 bytes io << [to_binary_dos_date(mtime)].pack(C_v) # last mod file date 2 bytes io << [crc32].pack(C_V) # crc-32 4 bytes if add_zip64 io << [FOUR_BYTE_MAX_UINT].pack(C_V) # compressed size 4 bytes io << [FOUR_BYTE_MAX_UINT].pack(C_V) # uncompressed size 4 bytes else io << [compressed_size].pack(C_V) # compressed size 4 bytes io << [uncompressed_size].pack(C_V) # uncompressed size 4 bytes end # Filename should not be longer than 0xFFFF otherwise this wont fit here io << [filename.bytesize].pack(C_v) # file name length 2 bytes extra_fields = if add_zip64 zip_64_extra_for_central_directory_file_header(local_file_header_location: local_file_header_location, compressed_size: compressed_size, uncompressed_size: uncompressed_size) else '' end extra_fields << timestamp_extra(mtime) io << [extra_fields.bytesize].pack(C_v) # extra field length 2 bytes io << [0].pack(C_v) # file comment length 2 bytes # For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used # because otherwise it does not properly advance the pointer when reading the Zip64 extra field # https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff if add_zip64 # disk number start 2 bytes io << [TWO_BYTE_MAX_UINT].pack(C_v) else io << [0].pack(C_v) end io << [0].pack(C_v) # internal file attributes 2 bytes io << [DEFAULT_EXTERNAL_ATTRS].pack(C_V) # external file attributes 4 bytes if add_zip64 # relative offset of local header 4 bytes io << [FOUR_BYTE_MAX_UINT].pack(C_V) else io << [local_file_header_location].pack(C_V) end io << filename # file name (variable size) io << extra_fields # extra field (variable size) #(empty) # file comment (variable size) end # Writes the data descriptor following the file data for a file whose local file header # was written with general-purpose flag bit 3 set. If the one of the sizes exceeds the Zip64 threshold, # the data descriptor will have the sizes written out as 8-byte values instead of 4-byte values. # # @param io[#<<] the buffer to write the local file header to # @param crc32[Fixnum] The CRC32 checksum of the file # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP # @param uncompressed_size[Fixnum] The size of the file once extracted # @return [void] def write_data_descriptor(io:, compressed_size:, uncompressed_size:, crc32:) io << [0x08074b50].pack(C_V) # Although not originally assigned a signature, the value # 0x08074b50 has commonly been adopted as a signature value # for the data descriptor record. io << [crc32].pack(C_V) # crc-32 4 bytes # If one of the sizes is above 0xFFFFFFF use ZIP64 lengths (8 bytes) instead. A good unarchiver # will decide to unpack it as such if it finds the Zip64 extra for the file in the central directory. # So also use the opportune moment to switch the entry to Zip64 if needed requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT) pack_spec = requires_zip64 ? C_Qe : C_V io << [compressed_size].pack(pack_spec) # compressed size 4 bytes, or 8 bytes for ZIP64 io << [uncompressed_size].pack(pack_spec) # uncompressed size 4 bytes, or 8 bytes for ZIP64 end # Writes the "end of central directory record" (including the Zip6 salient bits if necessary) # # @param io[#<<] the buffer to write the central directory to. # @param start_of_central_directory_location[Fixnum] byte offset of the start of central directory form the beginning of ZIP file # @param central_directory_size[Fixnum] the size of the central directory (only file headers) in bytes # @param num_files_in_archive[Fixnum] How many files the archive contains # @param comment[String] the comment for the archive (defaults to ZIP_TRICKS_COMMENT) # @return [void] def write_end_of_central_directory(io:, start_of_central_directory_location:, central_directory_size:, num_files_in_archive:, comment: ZIP_TRICKS_COMMENT) zip64_eocdr_offset = start_of_central_directory_location + central_directory_size zip64_required = central_directory_size > FOUR_BYTE_MAX_UINT || start_of_central_directory_location > FOUR_BYTE_MAX_UINT || zip64_eocdr_offset > FOUR_BYTE_MAX_UINT || num_files_in_archive > TWO_BYTE_MAX_UINT # Then, if zip64 is used if zip64_required # [zip64 end of central directory record] # zip64 end of central dir io << [0x06064b50].pack(C_V) # signature 4 bytes (0x06064b50) io << [44].pack(C_Qe) # size of zip64 end of central # directory record 8 bytes # (this is ex. the 12 bytes of the signature and the size value itself). # Without the extensible data sector (which we are not using) # it is always 44 bytes. io << MADE_BY_SIGNATURE # version made by 2 bytes io << [VERSION_NEEDED_TO_EXTRACT_ZIP64].pack(C_v) # version needed to extract 2 bytes io << [0].pack(C_V) # number of this disk 4 bytes io << [0].pack(C_V) # number of the disk with the # start of the central directory 4 bytes io << [num_files_in_archive].pack(C_Qe) # total number of entries in the # central directory on this disk 8 bytes io << [num_files_in_archive].pack(C_Qe) # total number of entries in the # central directory 8 bytes io << [central_directory_size].pack(C_Qe) # size of the central directory 8 bytes # offset of start of central # directory with respect to io << [start_of_central_directory_location].pack(C_Qe) # the starting disk number 8 bytes # zip64 extensible data sector (variable size), blank for us # [zip64 end of central directory locator] io << [0x07064b50].pack(C_V) # zip64 end of central dir locator # signature 4 bytes (0x07064b50) io << [0].pack(C_V) # number of the disk with the # start of the zip64 end of # central directory 4 bytes io << [zip64_eocdr_offset].pack(C_Qe) # relative offset of the zip64 # end of central directory record 8 bytes # (note: "relative" is actually "from the start of the file") io << [1].pack(C_V) # total number of disks 4 bytes end # Then the end of central directory record: io << [0x06054b50].pack(C_V) # end of central dir signature 4 bytes (0x06054b50) io << [0].pack(C_v) # number of this disk 2 bytes io << [0].pack(C_v) # number of the disk with the # start of the central directory 2 bytes if zip64_required # the number of entries will be read from the zip64 part of the central directory io << [TWO_BYTE_MAX_UINT].pack(C_v) # total number of entries in the # central directory on this disk 2 bytes io << [TWO_BYTE_MAX_UINT].pack(C_v) # total number of entries in # the central directory 2 bytes else io << [num_files_in_archive].pack(C_v) # total number of entries in the # central directory on this disk 2 bytes io << [num_files_in_archive].pack(C_v) # total number of entries in # the central directory 2 bytes end if zip64_required io << [FOUR_BYTE_MAX_UINT].pack(C_V) # size of the central directory 4 bytes io << [FOUR_BYTE_MAX_UINT].pack(C_V) # offset of start of central # directory with respect to # the starting disk number 4 bytes else io << [central_directory_size].pack(C_V) # size of the central directory 4 bytes io << [start_of_central_directory_location].pack(C_V) # offset of start of central # directory with respect to # the starting disk number 4 bytes end io << [comment.bytesize].pack(C_v) # .ZIP file comment length 2 bytes io << comment # .ZIP file comment (variable size) end private # Writes the Zip64 extra field for the local file header. Will be used by `write_local_file_header` when any sizes given to it warrant that. # # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP # @param uncompressed_size[Fixnum] The size of the file once extracted # @return [String] def zip_64_extra_for_local_file_header(compressed_size:, uncompressed_size:) data_and_packspecs = [ 0x0001, C_v, # 2 bytes Tag for this "extra" block type 16, C_v, # 2 bytes Size of this "extra" block. For us it will always be 16 (2x8) uncompressed_size, C_Qe, # 8 bytes Original uncompressed file size compressed_size, C_Qe, # 8 bytes Size of compressed data ] pack_array(data_and_packspecs) end # Writes the extended timestamp information field. The spec defines 2 # different formats - the one for the local file header can also accomodate the # atime and ctime, whereas the one for the central directory can only take # the mtime - and refers the reader to the local header extra to obtain the # remaining times def timestamp_extra(mtime) # Local-header version: # # Value Size Description # ----- ---- ----------- # (time) 0x5455 Short tag for this extra block type ("UT") # TSize Short total data size for this block # Flags Byte info bits # (ModTime) Long time of last modification (UTC/GMT) # (AcTime) Long time of last access (UTC/GMT) # (CrTime) Long time of original creation (UTC/GMT) # # Central-header version: # # Value Size Description # ----- ---- ----------- # (time) 0x5455 Short tag for this extra block type ("UT") # TSize Short total data size for this block # Flags Byte info bits (refers to local header!) # (ModTime) Long time of last modification (UTC/GMT) # # The lower three bits of Flags in both headers indicate which time- # stamps are present in the LOCAL extra field: # # bit 0 if set, modification time is present # bit 1 if set, access time is present # bit 2 if set, creation time is present # bits 3-7 reserved for additional timestamps; not set flags = 0b10000000 # Set bit 1 only to indicate only mtime is present data_and_packspecs = [ 0x5455, C_v, # tag for this extra block type ("UT") (1 + 4), C_v, # the size of this block (1 byte used for the Flag + 1 long used for the timestamp) flags, C_C, # encode a single byte mtime.utc.to_i, C_N, # Use a signed long, not the unsigned one used by the rest of the ZIP spec. ] pack_array(data_and_packspecs) end # Writes the Zip64 extra field for the central directory header.It differs from the extra used in the local file header because it # also contains the location of the local file header in the ZIP as an 8-byte int. # # @param compressed_size[Fixnum] The size of the compressed (or stored) data - how much space it uses in the ZIP # @param uncompressed_size[Fixnum] The size of the file once extracted # @param local_file_header_location[Fixnum] Byte offset of the start of the local file header from the beginning of the ZIP archive # @return [String] def zip_64_extra_for_central_directory_file_header(compressed_size:, uncompressed_size:, local_file_header_location:) data_and_packspecs = [ 0x0001, C_v, # 2 bytes Tag for this "extra" block type 28, C_v, # 2 bytes Size of this "extra" block. For us it will always be 28 uncompressed_size, C_Qe, # 8 bytes Original uncompressed file size compressed_size, C_Qe, # 8 bytes Size of compressed data local_file_header_location, C_Qe, # 8 bytes Offset of local header record 0, C_V, # 4 bytes Number of the disk on which this file starts ] pack_array(data_and_packspecs) end def to_binary_dos_time(t) (t.sec/2) + (t.min << 5) + (t.hour << 11) end def to_binary_dos_date(t) (t.day) + (t.month << 5) + ((t.year - 1980) << 9) end # Unzips a given array of tuples of "numeric value, pack specifier" and then packs all the odd # values using specifiers from all the even values. It is harder to explain than to show: # # pack_array([1, 'V', 2, 'v', 148, 'v]) #=> "\x01\x00\x00\x00\x02\x00\x94\x00" # # will do the following two transforms: # # [1, 'V', 2, 'v', 148, 'v] -> [1,2,148], ['V','v','v'] -> [1,2,148].pack('Vvv') -> "\x01\x00\x00\x00\x02\x00\x94\x00" def pack_array(values_to_packspecs) values, packspecs = values_to_packspecs.partition.each_with_index { |_, i| i.even? } values.pack(packspecs.join) end end