microzip.rb in zip_tricks-2.8.1

- old
+ new

@@ -11,10 +11,11 @@
 class ZipTricks::Microzip
   STORED   = 0
   DEFLATED = 8
 
   TooMuch = Class.new(StandardError)
+  PathError = Class.new(StandardError)
   DuplicateFilenames = Class.new(StandardError)
   UnknownMode = Class.new(StandardError)
   
   FOUR_BYTE_MAX_UINT = 0xFFFFFFFF
   TWO_BYTE_MAX_UINT = 0xFFFF
@@ -40,70 +41,30 @@
 
   C_V = 'V'.freeze
   C_v = 'v'.freeze
   C_Qe = 'Q<'.freeze
 
-  module Bytesize
-    def bytesize_of
-      ''.force_encoding(Encoding::BINARY).tap {|b| yield(b) }.bytesize
-    end
-  end
-  include Bytesize
-  
   class Entry < Struct.new(:filename, :crc32, :compressed_size, :uncompressed_size, :storage_mode, :mtime)
-    include Bytesize
     def initialize(*)
       super
+      filename.force_encoding(Encoding::UTF_8)
+      @requires_efs_flag = !(filename.encode(Encoding::ASCII) rescue false)
       @requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT)
-      if filename.bytesize > TWO_BYTE_MAX_UINT
-        raise TooMuch, "The given filename is too long to fit (%d bytes)" % filename.bytesize
-      end
+      raise TooMuch, "Filename is too long" if filename.bytesize > TWO_BYTE_MAX_UINT
+      raise PathError, "Paths in ZIP may only contain forward slashes (UNIX separators)" if filename.include?('\\')
     end
 
     def requires_zip64?
       @requires_zip64
     end
     
     # Set the general purpose flags for the entry. The only flag we care about is the EFS
     # bit (bit 11) which should be set if the filename is UTF8. If it is, we need to set the
     # bit so that the unarchiving application knows that the filename in the archive is UTF-8
     # encoded, and not some DOS default. For ASCII entries it does not matter.
-    #
-    # Now, strictly speaking, if a diacritic-containing character (such as å) does fit into the DOS-437
-    # codepage, it should be encodable as such. This would, in theory, let older Windows tools
-    # decode the filename correctly. However, this kills the filename decoding for the OSX builtin
-    # archive utility (it assumes the filename to be UTF-8, regardless). So if we allow filenames
-    # to be encoded in DOS-437, we _potentially_ have support in Windows but we upset everyone on Mac.
-    # If we just use UTF-8 and set the right EFS bit in general purpose flags, we upset Windows users
-    # because most of the Windows unarchive tools (at least the builtin ones) do not give a flying eff
-    # about the EFS support bit being set.
-    #
-    # Additionally, if we use Unarchiver on OSX (which is our recommended unpacker for large files),
-    # it will (very rightfully) ask us how we should decode each filename that does not have the EFS bit,
-    # but does contain something non-ASCII-decodable. This is horrible UX for users.
-    #
-    # So, basically, we have 2 choices, for filenames containing diacritics (for bona-fide UTF-8 you do not
-    # even get those choices, you _have_ to use UTF-8):
-    #
-    # * Make life easier for Windows users by setting stuff to DOS, not care about the standard _and_ make
-    #   most of Mac users upset
-    # * Make life easy for Mac users and conform to the standard, and tell Windows users to get a _decent_
-    #   ZIP unarchiving tool.
-    #
-    # We are going with option 2, and this is well-thought-out. Trust me. If you want the crazytown
-    # filename encoding scheme that is described here http://stackoverflow.com/questions/13261347
-    # you can try this:
-    #
-    #  [Encoding::CP437, Encoding::ISO_8859_1, Encoding::UTF_8]
-    #
-    # We don't want no such thing, and sorry Windows users, you are going to need a decent unarchiver
-    # that honors the standard. Alas, alas.
     def gp_flags_based_on_filename
-      filename.encode(Encoding::ASCII)
-      0b00000000000
-    rescue EncodingError
-      0b00000000000 | 0b100000000000
+      @requires_efs_flag ? (0b00000000000 | 0b100000000000) : 0b00000000000
     end
 
     def write_local_file_header(io)
       # TBD: caveat. If this entry _does_ fit into a standard zip segment (both compressed and
       # uncompressed size at or below 0xFFFF etc), but it is _located_ at an offset that requires
@@ -210,13 +171,20 @@
         }
       end
       io << [extra_size].pack(C_v)                        # extra field length              2 bytes
 
       io << [0].pack(C_v)                                 # file comment length             2 bytes
-      io << [0].pack(C_v)                                 # disk number start               2 bytes
-      io << [0].pack(C_v)                                 # internal file attributes        2 bytes
       
+      # For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used
+      # because otherwise it does not properly advance the pointer when reading the Zip64 extra field
+      # https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff
+      if @requires_zip64
+        io << [TWO_BYTE_MAX_UINT].pack(C_v)               # disk number start               2 bytes
+      else
+        io << [0].pack(C_v)                               # disk number start               2 bytes
+      end
+      io << [0].pack(C_v)                                # internal file attributes        2 bytes
       io << [DEFAULT_EXTERNAL_ATTRS].pack(C_V)           # external file attributes        4 bytes
 
       if @requires_zip64
         io << [FOUR_BYTE_MAX_UINT].pack(C_V)             # relative offset of local header 4 bytes
       else
@@ -230,10 +198,14 @@
                                                           # file comment (variable size)
     end
 
     private
 
+    def bytesize_of
+      ''.force_encoding(Encoding::BINARY).tap {|b| yield(b) }.bytesize
+    end
+
     def to_binary_dos_time(t)
       (t.sec/2) + (t.min << 5) + (t.hour << 11)
     end
 
     def to_binary_dos_date(t)
@@ -311,13 +283,13 @@
                                                              # central directory               8 bytes
       io << [central_dir_size].pack(C_Qe)                    # size of the central directory   8 bytes
                                                              # offset of start of central
                                                              # directory with respect to
       io << [start_of_central_directory].pack(C_Qe)          # the starting disk number        8 bytes
-                                                              # zip64 extensible data sector    (variable size)
+                                                             # zip64 extensible data sector    (variable size), blank for us
 
       # [zip64 end of central directory locator]
-      io << [0x07064b50].pack("V")                           # zip64 end of central dir locator
+      io << [0x07064b50].pack(C_V)                           # zip64 end of central dir locator
                                                              # signature                       4 bytes  (0x07064b50)
       io << [0].pack(C_V)                                    # number of the disk with the
                                                              # start of the zip64 end of
                                                              # central directory               4 bytes
       io << [zip64_eocdr_offset].pack(C_Qe)                  # relative offset of the zip64