lib/zip_tricks/microzip.rb in zip_tricks-2.8.0 vs lib/zip_tricks/microzip.rb in zip_tricks-2.8.1
- old
+ new
@@ -11,10 +11,11 @@
class ZipTricks::Microzip
STORED = 0
DEFLATED = 8
TooMuch = Class.new(StandardError)
+ PathError = Class.new(StandardError)
DuplicateFilenames = Class.new(StandardError)
UnknownMode = Class.new(StandardError)
FOUR_BYTE_MAX_UINT = 0xFFFFFFFF
TWO_BYTE_MAX_UINT = 0xFFFF
@@ -40,70 +41,30 @@
C_V = 'V'.freeze
C_v = 'v'.freeze
C_Qe = 'Q<'.freeze
- module Bytesize
- def bytesize_of
- ''.force_encoding(Encoding::BINARY).tap {|b| yield(b) }.bytesize
- end
- end
- include Bytesize
-
class Entry < Struct.new(:filename, :crc32, :compressed_size, :uncompressed_size, :storage_mode, :mtime)
- include Bytesize
def initialize(*)
super
+ filename.force_encoding(Encoding::UTF_8)
+ @requires_efs_flag = !(filename.encode(Encoding::ASCII) rescue false)
@requires_zip64 = (compressed_size > FOUR_BYTE_MAX_UINT || uncompressed_size > FOUR_BYTE_MAX_UINT)
- if filename.bytesize > TWO_BYTE_MAX_UINT
- raise TooMuch, "The given filename is too long to fit (%d bytes)" % filename.bytesize
- end
+ raise TooMuch, "Filename is too long" if filename.bytesize > TWO_BYTE_MAX_UINT
+ raise PathError, "Paths in ZIP may only contain forward slashes (UNIX separators)" if filename.include?('\\')
end
def requires_zip64?
@requires_zip64
end
# Set the general purpose flags for the entry. The only flag we care about is the EFS
# bit (bit 11) which should be set if the filename is UTF8. If it is, we need to set the
# bit so that the unarchiving application knows that the filename in the archive is UTF-8
# encoded, and not some DOS default. For ASCII entries it does not matter.
- #
- # Now, strictly speaking, if a diacritic-containing character (such as å) does fit into the DOS-437
- # codepage, it should be encodable as such. This would, in theory, let older Windows tools
- # decode the filename correctly. However, this kills the filename decoding for the OSX builtin
- # archive utility (it assumes the filename to be UTF-8, regardless). So if we allow filenames
- # to be encoded in DOS-437, we _potentially_ have support in Windows but we upset everyone on Mac.
- # If we just use UTF-8 and set the right EFS bit in general purpose flags, we upset Windows users
- # because most of the Windows unarchive tools (at least the builtin ones) do not give a flying eff
- # about the EFS support bit being set.
- #
- # Additionally, if we use Unarchiver on OSX (which is our recommended unpacker for large files),
- # it will (very rightfully) ask us how we should decode each filename that does not have the EFS bit,
- # but does contain something non-ASCII-decodable. This is horrible UX for users.
- #
- # So, basically, we have 2 choices, for filenames containing diacritics (for bona-fide UTF-8 you do not
- # even get those choices, you _have_ to use UTF-8):
- #
- # * Make life easier for Windows users by setting stuff to DOS, not care about the standard _and_ make
- # most of Mac users upset
- # * Make life easy for Mac users and conform to the standard, and tell Windows users to get a _decent_
- # ZIP unarchiving tool.
- #
- # We are going with option 2, and this is well-thought-out. Trust me. If you want the crazytown
- # filename encoding scheme that is described here http://stackoverflow.com/questions/13261347
- # you can try this:
- #
- # [Encoding::CP437, Encoding::ISO_8859_1, Encoding::UTF_8]
- #
- # We don't want no such thing, and sorry Windows users, you are going to need a decent unarchiver
- # that honors the standard. Alas, alas.
def gp_flags_based_on_filename
- filename.encode(Encoding::ASCII)
- 0b00000000000
- rescue EncodingError
- 0b00000000000 | 0b100000000000
+ @requires_efs_flag ? (0b00000000000 | 0b100000000000) : 0b00000000000
end
def write_local_file_header(io)
# TBD: caveat. If this entry _does_ fit into a standard zip segment (both compressed and
# uncompressed size at or below 0xFFFF etc), but it is _located_ at an offset that requires
@@ -210,13 +171,20 @@
}
end
io << [extra_size].pack(C_v) # extra field length 2 bytes
io << [0].pack(C_v) # file comment length 2 bytes
- io << [0].pack(C_v) # disk number start 2 bytes
- io << [0].pack(C_v) # internal file attributes 2 bytes
+ # For The Unarchiver < 3.11.1 this field has to be set to the overflow value if zip64 is used
+ # because otherwise it does not properly advance the pointer when reading the Zip64 extra field
+ # https://bitbucket.org/WAHa_06x36/theunarchiver/pull-requests/2/bug-fix-for-zip64-extra-field-parser/diff
+ if @requires_zip64
+ io << [TWO_BYTE_MAX_UINT].pack(C_v) # disk number start 2 bytes
+ else
+ io << [0].pack(C_v) # disk number start 2 bytes
+ end
+ io << [0].pack(C_v) # internal file attributes 2 bytes
io << [DEFAULT_EXTERNAL_ATTRS].pack(C_V) # external file attributes 4 bytes
if @requires_zip64
io << [FOUR_BYTE_MAX_UINT].pack(C_V) # relative offset of local header 4 bytes
else
@@ -230,10 +198,14 @@
# file comment (variable size)
end
private
+ def bytesize_of
+ ''.force_encoding(Encoding::BINARY).tap {|b| yield(b) }.bytesize
+ end
+
def to_binary_dos_time(t)
(t.sec/2) + (t.min << 5) + (t.hour << 11)
end
def to_binary_dos_date(t)
@@ -311,13 +283,13 @@
# central directory 8 bytes
io << [central_dir_size].pack(C_Qe) # size of the central directory 8 bytes
# offset of start of central
# directory with respect to
io << [start_of_central_directory].pack(C_Qe) # the starting disk number 8 bytes
- # zip64 extensible data sector (variable size)
+ # zip64 extensible data sector (variable size), blank for us
# [zip64 end of central directory locator]
- io << [0x07064b50].pack("V") # zip64 end of central dir locator
+ io << [0x07064b50].pack(C_V) # zip64 end of central dir locator
# signature 4 bytes (0x07064b50)
io << [0].pack(C_V) # number of the disk with the
# start of the zip64 end of
# central directory 4 bytes
io << [zip64_eocdr_offset].pack(C_Qe) # relative offset of the zip64