Sha256: 085ed311584f854c018c19287b55ef0739e898d62e2c4ae7d6ed245551e0e2ac
Contents?: true
Size: 1.93 KB
Versions: 3
Compression:
Stored size: 1.93 KB
Contents
class FormatParser::ZIPParser require_relative 'zip_parser/file_reader' require_relative 'zip_parser/office_formats' include OfficeFormats def call(io) io = FormatParser::IOConstraint.new(io) reader = FileReader.new entries = reader.read_zip_structure(io: io) filenames_set = Set.new entries_archive = entries.map do |ze| ft = directory?(ze) ? :directory : :file decoded_filename = decode_filename_of(ze) filenames_set << decoded_filename FormatParser::Archive::Entry.new(type: ft, size: ze.uncompressed_size, filename: decoded_filename) end if office_document?(filenames_set) office_format = office_file_format_from_entry_set(filenames_set) FormatParser::Archive.new(nature: :document, format: office_format, entries: entries_archive) else FormatParser::Archive.new(nature: :archive, format: :zip, entries: entries_archive) end rescue FileReader::Error # This is not a ZIP, or a broken ZIP. return end def directory?(zip_entry) # We can do a lap dance here and parse out the individual bit fields # from the external attributes, check the OS type that is in the entry # to see if it can be interpreted as UNIX or not, and generally have # heaps of fun. Instead, we will be frugal. zip_entry.filename.end_with?('/') end def decode_filename(filename, likely_unicode:) filename.force_encoding(Encoding::UTF_8) if likely_unicode FormatParser.string_to_lossy_utf8(filename) end def decode_filename_of(zip_entry) # Check for the EFS bit in the general-purpose flags. If it is set, # the entry filename can be treated as UTF-8 if zip_entry.gp_flags & 0b100000000000 == 0b100000000000 decode_filename(zip_entry.filename, likely_unicode: true) else decode_filename(zip_entry.filename, likely_unicode: false) end end FormatParser.register_parser self, natures: [:archive, :document], formats: :zip end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
format_parser-0.13.2 | lib/parsers/zip_parser.rb |
format_parser-0.13.1 | lib/parsers/zip_parser.rb |
format_parser-0.13.0 | lib/parsers/zip_parser.rb |