lib/zip_tricks/microzip.rb in zip_tricks-2.8.0 vs lib/zip_tricks/microzip.rb in zip_tricks-2.8.1

- old
+ new

@@ -11,10 +11,11 @@ class ZipTricks::Microzip STORED = 0 DEFLATED = 8 TooMuch = Class.new(StandardError) + PathError = Class.new(StandardError) DuplicateFilenames = Class.new(StandardError) UnknownMode = Class.new(StandardError) FOUR_BYTE_MAX_UINT = 0xFFFFFFFF TWO_BYTE_MAX_UINT = 0xFFFF @@ -40,70 +41,30 @@ C_V = 'V'.freeze C_v = 'v'.freeze C_Qe = 'Q<'.freeze - module Bytesize - def bytesize_of - ''.force_encoding(Encoding::BINARY).tap {|b| yield(b) }.bytesize - end - end - include Bytesize - class Entry < Struct.new(:filename, :crc32, :compressed_size, :uncompressed_size, :storage_mode, :mtime) - include Bytesize def initialize(*) super + filename.force_encoding(Encoding::UTF_8) + @requires_efs_flag = !(filename.encode(Encoding::ASCII) rescue false) @requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT) - if filename.bytesize > TWO_BYTE_MAX_UINT - raise TooMuch, "The given filename is too long to fit (%d bytes)" % filename.bytesize - end + raise TooMuch, "Filename is too long" if filename.bytesize > TWO_BYTE_MAX_UINT + raise PathError, "Paths in ZIP may only contain forward slashes (UNIX separators)" if filename.include?('\\') end def requires_zip64? @requires_zip64 end # Set the general purpose flags for the entry. The only flag we care about is the EFS # bit (bit 11) which should be set if the filename is UTF8. If it is, we need to set the # bit so that the unarchiving application knows that the filename in the archive is UTF-8 # encoded, and not some DOS default. For ASCII entries it does not matter. - # - # Now, strictly speaking, if a diacritic-containing character (such as å) does fit into the DOS-437 - # codepage, it should be encodable as such. This would, in theory, let older Windows tools - # decode the filename correctly. However, this kills the filename decoding for the OSX builtin - # archive utility (it assumes the filename to be UTF-8, regardless). So if we allow filenames - # to be encoded in DOS-437, we _potentially_ have support in Windows but we upset everyone on Mac. - # If we just use UTF-8 and set the right EFS bit in general purpose flags, we upset Windows users - # because most of the Windows unarchive tools (at least the builtin ones) do not give a flying eff - # about the EFS support bit being set. - # - # Additionally, if we use Unarchiver on OSX (which is our recommended unpacker for large files), - # it will (very rightfully) ask us how we should decode each filename that does not have the EFS bit, - # but does contain something non-ASCII-decodable. This is horrible UX for users. - # - # So, basically, we have 2 choices, for filenames containing diacritics (for bona-fide UTF-8 you do not - # even get those choices, you _have_ to use UTF-8): - # - # * Make life easier for Windows users by setting stuff to DOS, not care about the standard _and_ make - # most of Mac users upset - # * Make life easy for Mac users and conform to the standard, and tell Windows users to get a _decent_ - # ZIP unarchiving tool. - # - # We are going with option 2, and this is well-thought-out. Trust me. If you want the crazytown - # filename encoding scheme that is described here http://stackoverflow.com/questions/13261347 - # you can try this: - # - # [Encoding::CP437, Encoding::ISO_8859_1, Encoding::UTF_8] - # - # We don't want no such thing, and sorry Windows users, you are going to need a decent unarchiver - # that honors the standard. Alas, alas. def gp_flags_based_on_filename - filename.encode(Encoding::ASCII) - 0b00000000000 - rescue EncodingError - 0b00000000000 | 0b100000000000 + @requires_efs_flag ? (0b00000000000 | 0b100000000000) : 0b00000000000 end def write_local_file_header(io) # TBD: caveat. If this entry _does_ fit into a standard zip segment (both compressed and # uncompressed size at or below 0xFFFF etc), but it is _located_ at an offset that requires @@ -210,13 +171,20 @@ } end io << [extra_size].pack(C_v) # extra field length 2 bytes io << [0].pack(C_v) # file comment length 2 bytes - io << [0].pack(C_v) # disk number start 2 bytes - io << [0].pack(C_v) # internal file attributes 2 bytes + # For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used + # because otherwise it does not properly advance the pointer when reading the Zip64 extra field + # https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff + if @requires_zip64 + io << [TWO_BYTE_MAX_UINT].pack(C_v) # disk number start 2 bytes + else + io << [0].pack(C_v) # disk number start 2 bytes + end + io << [0].pack(C_v) # internal file attributes 2 bytes io << [DEFAULT_EXTERNAL_ATTRS].pack(C_V) # external file attributes 4 bytes if @requires_zip64 io << [FOUR_BYTE_MAX_UINT].pack(C_V) # relative offset of local header 4 bytes else @@ -230,10 +198,14 @@ # file comment (variable size) end private + def bytesize_of + ''.force_encoding(Encoding::BINARY).tap {|b| yield(b) }.bytesize + end + def to_binary_dos_time(t) (t.sec/2) + (t.min << 5) + (t.hour << 11) end def to_binary_dos_date(t) @@ -311,13 +283,13 @@ # central directory 8 bytes io << [central_dir_size].pack(C_Qe) # size of the central directory 8 bytes # offset of start of central # directory with respect to io << [start_of_central_directory].pack(C_Qe) # the starting disk number 8 bytes - # zip64 extensible data sector (variable size) + # zip64 extensible data sector (variable size), blank for us # [zip64 end of central directory locator] - io << [0x07064b50].pack("V") # zip64 end of central dir locator + io << [0x07064b50].pack(C_V) # zip64 end of central dir locator # signature 4 bytes (0x07064b50) io << [0].pack(C_V) # number of the disk with the # start of the zip64 end of # central directory 4 bytes io << [zip64_eocdr_offset].pack(C_Qe) # relative offset of the zip64