module MagicBytes VERSION = '1.0.1' class FileType < Struct.new(:ext, :mime) end extend self # Performs detection from a given IO or File. # # @param io[#read] a readable object # @return [Hash, nil] the hash of ext: and mime: or nil if the type could not be deduced def read_and_detect(io) n_bytes = 262 # The maximum length supported (needed for .tar archives) detect(io.read(n_bytes)) end # This is a line-for-line port of https://github.com/sindresorhus/file-type # which is more than sufficient for our purposes # # @param header_bytes[String] the header bytes of the file # @return [Hash, nil] the hash of ext: and mime: or nil if the type could not be deduced def detect(header_bytes) d = _detect(header_bytes) FileType.new(d.fetch(:ext), d.fetch(:mime)) end private def _detect(header_bytes) buf = header_bytes.unpack("C*") if (buf[0] == 0xFF && buf[1] == 0xD8 && buf[2] == 0xFF) return { ext: 'jpg', mime: 'image/jpeg' } end if (buf[0] == 0x89 && buf[1] == 0x50 && buf[2] == 0x4E && buf[3] == 0x47) return { ext: 'png', mime: 'image/png' } end if (buf[0] == 0x47 && buf[1] == 0x49 && buf[2] == 0x46) return { ext: 'gif', mime: 'image/gif' } end if (buf[8] == 0x57 && buf[9] == 0x45 && buf[10] == 0x42 && buf[11] == 0x50) return { ext: 'webp', mime: 'image/webp' } end # needs to be before `tif` if (((buf[0] == 0x49 && buf[1] == 0x49 && buf[2] == 0x2A && buf[3] == 0x0) || (buf[0] == 0x4D && buf[1] == 0x4D && buf[2] == 0x0 && buf[3] == 0x2A)) && buf[8] == 0x43 && buf[9] == 0x52) return { ext: 'cr2', mime: 'image/x-canon-cr2' } end if ((buf[0] == 0x49 && buf[1] == 0x49 && buf[2] == 0x2A && buf[3] == 0x0) || (buf[0] == 0x4D && buf[1] == 0x4D && buf[2] == 0x0 && buf[3] == 0x2A)) return { ext: 'tif', mime: 'image/tiff' } end if (buf[0] == 0x42 && buf[1] == 0x4D) return { ext: 'bmp', mime: 'image/bmp' } end if (buf[0] == 0x49 && buf[1] == 0x49 && buf[2] == 0xBC) return { ext: 'jxr', mime: 'image/vnd.ms-photo' } end if (buf[0] == 0x38 && buf[1] == 0x42 && buf[2] == 0x50 && buf[3] == 0x53) return { ext: 'psd', mime: 'image/vnd.adobe.photoshop' } end # needs to be before `zip` if (buf[0] == 0x50 && buf[1] == 0x4B && buf[2] == 0x3 && buf[3] == 0x4 && buf[30] == 0x6D && buf[31] == 0x69 && buf[32] == 0x6D && buf[33] == 0x65 && buf[34] == 0x74 && buf[35] == 0x79 && buf[36] == 0x70 && buf[37] == 0x65 && buf[38] == 0x61 && buf[39] == 0x70 && buf[40] == 0x70 && buf[41] == 0x6C && buf[42] == 0x69 && buf[43] == 0x63 && buf[44] == 0x61 && buf[45] == 0x74 && buf[46] == 0x69 && buf[47] == 0x6F && buf[48] == 0x6E && buf[49] == 0x2F && buf[50] == 0x65 && buf[51] == 0x70 && buf[52] == 0x75 && buf[53] == 0x62 && buf[54] == 0x2B && buf[55] == 0x7A && buf[56] == 0x69 && buf[57] == 0x70) return { ext: 'epub', mime: 'application/epub+zip' } end # needs to be before `zip` # assumes signed.xpi from addons.mozilla.org if (buf[0] == 0x50 && buf[1] == 0x4B && buf[2] == 0x3 && buf[3] == 0x4 && buf[30] == 0x4D && buf[31] == 0x45 && buf[32] == 0x54 && buf[33] == 0x41 && buf[34] == 0x2D && buf[35] == 0x49 && buf[36] == 0x4E && buf[37] == 0x46 && buf[38] == 0x2F && buf[39] == 0x6D && buf[40] == 0x6F && buf[41] == 0x7A && buf[42] == 0x69 && buf[43] == 0x6C && buf[44] == 0x6C && buf[45] == 0x61 && buf[46] == 0x2E && buf[47] == 0x72 && buf[48] == 0x73 && buf[49] == 0x61) return { ext: 'xpi', mime: 'application/x-xpinstall' } end if (buf[0] == 0x50 && buf[1] == 0x4B && (buf[2] == 0x3 || buf[2] == 0x5 || buf[2] == 0x7) && (buf[3] == 0x4 || buf[3] == 0x6 || buf[3] == 0x8)) return { ext: 'zip', mime: 'application/zip' } end if (buf[257] == 0x75 && buf[258] == 0x73 && buf[259] == 0x74 && buf[260] == 0x61 && buf[261] == 0x72) return { ext: 'tar', mime: 'application/x-tar' } end if (buf[0] == 0x52 && buf[1] == 0x61 && buf[2] == 0x72 && buf[3] == 0x21 && buf[4] == 0x1A && buf[5] == 0x7 && (buf[6] == 0x0 || buf[6] == 0x1)) return { ext: 'rar', mime: 'application/x-rar-compressed' } end if (buf[0] == 0x1F && buf[1] == 0x8B && buf[2] == 0x8) return { ext: 'gz', mime: 'application/gzip' } end if (buf[0] == 0x42 && buf[1] == 0x5A && buf[2] == 0x68) return { ext: 'bz2', mime: 'application/x-bzip2' } end if (buf[0] == 0x37 && buf[1] == 0x7A && buf[2] == 0xBC && buf[3] == 0xAF && buf[4] == 0x27 && buf[5] == 0x1C) return { ext: '7z', mime: 'application/x-7z-compressed' } end if (buf[0] == 0x78 && buf[1] == 0x01) return { ext: 'dmg', mime: 'application/x-apple-diskimage' } end if ( (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && (buf[3] == 0x18 || buf[3] == 0x20) && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70) || (buf[0] == 0x33 && buf[1] == 0x67 && buf[2] == 0x70 && buf[3] == 0x35) || (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x1C && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x6D && buf[9] == 0x70 && buf[10] == 0x34 && buf[11] == 0x32 && buf[16] == 0x6D && buf[17] == 0x70 && buf[18] == 0x34 && buf[19] == 0x31 && buf[20] == 0x6D && buf[21] == 0x70 && buf[22] == 0x34 && buf[23] == 0x32 && buf[24] == 0x69 && buf[25] == 0x73 && buf[26] == 0x6F && buf[27] == 0x6D) || (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x1C && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x69 && buf[9] == 0x73 && buf[10] == 0x6F && buf[11] == 0x6D) || (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x1c && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x6D && buf[9] == 0x70 && buf[10] == 0x34 && buf[11] == 0x32 && buf[12] == 0x0 && buf[13] == 0x0 && buf[14] == 0x0 && buf[15] == 0x0) ) return { ext: 'mp4', mime: 'video/mp4' } end if ((buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x1C && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x4D && buf[9] == 0x34 && buf[10] == 0x56)) return { ext: 'm4v', mime: 'video/x-m4v' } end if (buf[0] == 0x4D && buf[1] == 0x54 && buf[2] == 0x68 && buf[3] == 0x64) return { ext: 'mid', mime: 'audio/midi' } end # needs to be before the `webm` if (buf[31] == 0x6D && buf[32] == 0x61 && buf[33] == 0x74 && buf[34] == 0x72 && buf[35] == 0x6f && buf[36] == 0x73 && buf[37] == 0x6B && buf[38] == 0x61) return { ext: 'mkv', mime: 'video/x-matroska' } end if (buf[0] == 0x1A && buf[1] == 0x45 && buf[2] == 0xDF && buf[3] == 0xA3) return { ext: 'webm', mime: 'video/webm' } end if (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x0 && buf[3] == 0x14 && buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70) return { ext: 'mov', mime: 'video/quicktime' } end if (buf[0] == 0x52 && buf[1] == 0x49 && buf[2] == 0x46 && buf[3] == 0x46 && buf[8] == 0x41 && buf[9] == 0x56 && buf[10] == 0x49) return { ext: 'avi', mime: 'video/x-msvideo' } end if (buf[0] == 0x30 && buf[1] == 0x26 && buf[2] == 0xB2 && buf[3] == 0x75 && buf[4] == 0x8E && buf[5] == 0x66 && buf[6] == 0xCF && buf[7] == 0x11 && buf[8] == 0xA6 && buf[9] == 0xD9) return { ext: 'wmv', mime: 'video/x-ms-wmv' } end if (buf[0] == 0x0 && buf[1] == 0x0 && buf[2] == 0x1 && buf[3] == 186) # buf[3].toString(16)[0] == 'b' return { ext: 'mpg', mime: 'video/mpeg' } end if ((buf[0] == 0x49 && buf[1] == 0x44 && buf[2] == 0x33) || (buf[0] == 0xFF && buf[1] == 0xfb)) return { ext: 'mp3', mime: 'audio/mpeg' } end if ((buf[4] == 0x66 && buf[5] == 0x74 && buf[6] == 0x79 && buf[7] == 0x70 && buf[8] == 0x4D && buf[9] == 0x34 && buf[10] == 0x41) || (buf[0] == 0x4D && buf[1] == 0x34 && buf[2] == 0x41 && buf[3] == 0x20)) return { ext: 'm4a', mime: 'audio/m4a' } end # needs to be before `ogg` if (buf[28] == 0x4F && buf[29] == 0x70 && buf[30] == 0x75 && buf[31] == 0x73 && buf[32] == 0x48 && buf[33] == 0x65 && buf[34] == 0x61 && buf[35] == 0x64) return { ext: 'opus', mime: 'audio/opus' } end if (buf[0] == 0x4F && buf[1] == 0x67 && buf[2] == 0x67 && buf[3] == 0x53) return { ext: 'ogg', mime: 'audio/ogg' } end if (buf[0] == 0x66 && buf[1] == 0x4C && buf[2] == 0x61 && buf[3] == 0x43) return { ext: 'flac', mime: 'audio/x-flac' } end if (buf[0] == 0x52 && buf[1] == 0x49 && buf[2] == 0x46 && buf[3] == 0x46 && buf[8] == 0x57 && buf[9] == 0x41 && buf[10] == 0x56 && buf[11] == 0x45) return { ext: 'wav', mime: 'audio/x-wav' } end if (buf[0] == 0x23 && buf[1] == 0x21 && buf[2] == 0x41 && buf[3] == 0x4D && buf[4] == 0x52 && buf[5] == 0x0A) return { ext: 'amr', mime: 'audio/amr' } end if (buf[0] == 0x25 && buf[1] == 0x50 && buf[2] == 0x44 && buf[3] == 0x46) return { ext: 'pdf', mime: 'application/pdf' } end if (buf[0] == 0x4D && buf[1] == 0x5A) return { ext: 'exe', mime: 'application/x-msdownload' } end if ((buf[0] == 0x43 || buf[0] == 0x46) && buf[1] == 0x57 && buf[2] == 0x53) return { ext: 'swf', mime: 'application/x-shockwave-flash' } end if (buf[0] == 0x7B && buf[1] == 0x5C && buf[2] == 0x72 && buf[3] == 0x74 && buf[4] == 0x66) return { ext: 'rtf', mime: 'application/rtf' } end if ( (buf[0] == 0x77 && buf[1] == 0x4F && buf[2] == 0x46 && buf[3] == 0x46) && ( (buf[4] == 0x00 && buf[5] == 0x01 && buf[6] == 0x00 && buf[7] == 0x00) || (buf[4] == 0x4F && buf[5] == 0x54 && buf[6] == 0x54 && buf[7] == 0x4F) ) ) return { ext: 'woff', mime: 'application/font-woff' } end if ( (buf[0] == 0x77 && buf[1] == 0x4F && buf[2] == 0x46 && buf[3] == 0x32) && ( (buf[4] == 0x00 && buf[5] == 0x01 && buf[6] == 0x00 && buf[7] == 0x00) || (buf[4] == 0x4F && buf[5] == 0x54 && buf[6] == 0x54 && buf[7] == 0x4F) ) ) return { ext: 'woff2', mime: 'application/font-woff' } end if ( (buf[34] == 0x4C && buf[35] == 0x50) && ( (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x01) || (buf[8] == 0x01 && buf[9] == 0x00 && buf[10] == 0x02) || (buf[8] == 0x02 && buf[9] == 0x00 && buf[10] == 0x02) ) ) return { ext: 'eot', mime: 'application/octet-stream' } end if (buf[0] == 0x00 && buf[1] == 0x01 && buf[2] == 0x00 && buf[3] == 0x00 && buf[4] == 0x00) return { ext: 'ttf', mime: 'application/font-sfnt' } end if (buf[0] == 0x4F && buf[1] == 0x54 && buf[2] == 0x54 && buf[3] == 0x4F && buf[4] == 0x00) return { ext: 'otf', mime: 'application/font-sfnt' } end if (buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0x01 && buf[3] == 0x00) return { ext: 'ico', mime: 'image/x-icon' } end if (buf[0] == 0x46 && buf[1] == 0x4C && buf[2] == 0x56 && buf[3] == 0x01) return { ext: 'flv', mime: 'video/x-flv' } end if (buf[0] == 0x25 && buf[1] == 0x21) return { ext: 'ps', mime: 'application/postscript' } end if (buf[0] == 0xFD && buf[1] == 0x37 && buf[2] == 0x7A && buf[3] == 0x58 && buf[4] == 0x5A && buf[5] == 0x00) return { ext: 'xz', mime: 'application/x-xz' } end if (buf[0] == 0x53 && buf[1] == 0x51 && buf[2] == 0x4C && buf[3] == 0x69) return { ext: 'sqlite', mime: 'application/x-sqlite3' } end if (buf[0] == 0x4E && buf[1] == 0x45 && buf[2] == 0x53 && buf[3] == 0x1A) return { ext: 'nes', mime: 'application/x-nintendo-nes-rom' } end if (buf[0] == 0x43 && buf[1] == 0x72 && buf[2] == 0x32 && buf[3] == 0x34) return { ext: 'crx', mime: 'application/x-google-chrome-extension' } end if ( (buf[0] == 0x4D && buf[1] == 0x53 && buf[2] == 0x43 && buf[3] == 0x46) || (buf[0] == 0x49 && buf[1] == 0x53 && buf[2] == 0x63 && buf[3] == 0x28) ) return { ext: 'cab', mime: 'application/vnd.ms-cab-compressed' } end # needs to be before `ar` if (buf[0] == 0x21 && buf[1] == 0x3C && buf[2] == 0x61 && buf[3] == 0x72 && buf[4] == 0x63 && buf[5] == 0x68 && buf[6] == 0x3E && buf[7] == 0x0A && buf[8] == 0x64 && buf[9] == 0x65 && buf[10] == 0x62 && buf[11] == 0x69 && buf[12] == 0x61 && buf[13] == 0x6E && buf[14] == 0x2D && buf[15] == 0x62 && buf[16] == 0x69 && buf[17] == 0x6E && buf[18] == 0x61 && buf[19] == 0x72 && buf[20] == 0x79) return { ext: 'deb', mime: 'application/x-deb' } end if (buf[0] == 0x21 && buf[1] == 0x3C && buf[2] == 0x61 && buf[3] == 0x72 && buf[4] == 0x63 && buf[5] == 0x68 && buf[6] == 0x3E) return { ext: 'ar', mime: 'application/x-unix-archive' } end if (buf[0] == 0xED && buf[1] == 0xAB && buf[2] == 0xEE && buf[3] == 0xDB) return { ext: 'rpm', mime: 'application/x-rpm' } end if ( (buf[0] == 0x1F && buf[1] == 0xA0) || (buf[0] == 0x1F && buf[1] == 0x9D) ) return { ext: 'Z', mime: 'application/x-compress' } end if (buf[0] == 0x4C && buf[1] == 0x5A && buf[2] == 0x49 && buf[3] == 0x50) return { ext: 'lz', mime: 'application/x-lzip' } end if (buf[0] == 0xD0 && buf[1] == 0xCF && buf[2] == 0x11 && buf[3] == 0xE0 && buf[4] == 0xA1 && buf[5] == 0xB1 && buf[6] == 0x1A && buf[7] == 0xE1) return { ext: 'msi', mime: 'application/x-msi' } end return {ext: 'bin', mime: 'binary/octet-stream'} end end