ext/libmspack/mspack/chmd.c in libmspack-0.10.1.2 vs ext/libmspack/mspack/chmd.c in libmspack-0.11.0
- old
+ new
@@ -1,7 +1,7 @@
/* This file is part of libmspack.
- * (C) 2003-2018 Stuart Caie.
+ * (C) 2003-2023 Stuart Caie.
*
* libmspack is free software; you can redistribute it and/or modify it under
* the terms of the GNU Lesser General Public License (LGPL) version 2.1
*
* For further details, see the file COPYING.LIB distributed with libmspack
@@ -56,10 +56,12 @@
static int chmd_error(
struct mschm_decompressor *base);
static int read_off64(
off_t *var, unsigned char *mem, struct mspack_system *sys,
struct mspack_file *fh);
+static off_t read_encint(
+ const unsigned char **p, const unsigned char *end, int *err);
/* filenames of the system files used for decompression.
* Content and ControlData are essential.
* ResetTable is preferred, but SpanInfo can be used if not available
*/
@@ -247,28 +249,19 @@
/* {7C01FD11-7BAA-11D0-9E0C-00A0-C922-E6EC} */
0x11, 0xFD, 0x01, 0x7C, 0xAA, 0x7B, 0xD0, 0x11,
0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC
};
-/* reads an encoded integer into a variable; 7 bits of data per byte,
- * the high bit is used to indicate that there is another byte */
-#define READ_ENCINT(var) do { \
- (var) = 0; \
- do { \
- if (p >= end) goto chunk_end; \
- (var) = ((var) << 7) | (*p & 0x7F); \
- } while (*p++ & 0x80); \
-} while (0)
-
static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh,
struct mschmd_header *chm, int entire)
{
- unsigned int section, name_len, x, errors, num_chunks;
- unsigned char buf[0x54], *chunk = NULL, *name, *p, *end;
+ unsigned int errors, num_chunks;
+ unsigned char buf[0x54], *chunk = NULL;
+ const unsigned char *name, *p, *end;
struct mschmd_file *fi, *link = NULL;
- off_t offset, length;
- int num_entries;
+ off_t offset_hs0, filelen;
+ int num_entries, err = 0;
/* initialise pointers */
chm->files = NULL;
chm->sysfiles = NULL;
chm->chunk_cache = NULL;
@@ -310,19 +303,19 @@
}
/* chmhst3_OffsetCS0 does not exist in version 1 or 2 CHM files.
* The offset will be corrected later, once HS1 is read.
*/
- if (read_off64(&offset, &buf[chmhst_OffsetHS0], sys, fh) ||
+ if (read_off64(&offset_hs0, &buf[chmhst_OffsetHS0], sys, fh) ||
read_off64(&chm->dir_offset, &buf[chmhst_OffsetHS1], sys, fh) ||
read_off64(&chm->sec0.offset, &buf[chmhst3_OffsetCS0], sys, fh))
{
return MSPACK_ERR_DATAFORMAT;
}
/* seek to header section 0 */
- if (sys->seek(fh, offset, MSPACK_SYS_SEEK_START)) {
+ if (sys->seek(fh, offset_hs0, MSPACK_SYS_SEEK_START)) {
return MSPACK_ERR_SEEK;
}
/* read header section 0 */
if (sys->read(fh, &buf[0], chmhs0_SIZEOF) != chmhs0_SIZEOF) {
@@ -330,10 +323,22 @@
}
if (read_off64(&chm->length, &buf[chmhs0_FileLen], sys, fh)) {
return MSPACK_ERR_DATAFORMAT;
}
+ /* compare declared CHM file size against actual size */
+ if (!mspack_sys_filelen(sys, fh, &filelen)) {
+ if (chm->length > filelen) {
+ sys->message(fh, "WARNING; file possibly truncated by %" LD " bytes",
+ chm->length - filelen);
+ }
+ else if (chm->length < filelen) {
+ sys->message(fh, "WARNING; possible %" LD " extra bytes at end of file",
+ filelen - chm->length);
+ }
+ }
+
/* seek to header section 1 */
if (sys->seek(fh, chm->dir_offset, MSPACK_SYS_SEEK_START)) {
return MSPACK_ERR_SEEK;
}
@@ -410,16 +415,17 @@
if (!entire) {
return MSPACK_ERR_OK;
}
/* seek to the first PMGL chunk, and reduce the number of chunks to read */
- if ((x = chm->first_pmgl) != 0) {
- if (sys->seek(fh,(off_t) (x * chm->chunk_size), MSPACK_SYS_SEEK_CUR)) {
+ if (chm->first_pmgl != 0) {
+ off_t pmgl_offset = (off_t) chm->first_pmgl * (off_t) chm->chunk_size;
+ if (sys->seek(fh, pmgl_offset, MSPACK_SYS_SEEK_CUR)) {
return MSPACK_ERR_SEEK;
}
}
- num_chunks = chm->last_pmgl - x + 1;
+ num_chunks = chm->last_pmgl - chm->first_pmgl + 1;
if (!(chunk = (unsigned char *) sys->alloc(sys, (size_t)chm->chunk_size))) {
return MSPACK_ERR_NOMEMORY;
}
@@ -447,16 +453,19 @@
p = &chunk[pmgl_Entries];
end = &chunk[chm->chunk_size - 2];
num_entries = EndGetI16(end);
while (num_entries--) {
- READ_ENCINT(name_len);
- if (name_len > (unsigned int) (end - p)) goto chunk_end;
+ unsigned int name_len, section;
+ off_t offset, length;
+ name_len = read_encint(&p, end, &err);
+ if (err || (name_len > (unsigned int) (end - p))) goto encint_err;
name = p; p += name_len;
- READ_ENCINT(section);
- READ_ENCINT(offset);
- READ_ENCINT(length);
+ section = read_encint(&p, end, &err);
+ offset = read_encint(&p, end, &err);
+ length = read_encint(&p, end, &err);
+ if (err) goto encint_err;
/* ignore blank or one-char (e.g. "/") filenames we'd return as blank */
if (name_len < 2 || !name[0] || !name[1]) continue;
/* empty files and directory names are stored as a file entry at
@@ -480,11 +489,11 @@
fi->filename = (char *) &fi[1];
fi->section = ((section == 0) ? (struct mschmd_section *) (&chm->sec0)
: (struct mschmd_section *) (&chm->sec1));
fi->offset = offset;
fi->length = length;
- sys->copy(name, fi->filename, (size_t) name_len);
+ sys->copy((unsigned char *) name, fi->filename, (size_t) name_len);
fi->filename[name_len] = '\0';
if (name[0] == ':' && name[1] == ':') {
/* system file */
if (name_len == 40 && memcmp(name, content_name, 40) == 0) {
@@ -508,14 +517,14 @@
link = fi;
}
}
/* this is reached either when num_entries runs out, or if
- * reading data from the chunk reached a premature end of chunk */
- chunk_end:
+ * an ENCINT is badly encoded */
+ encint_err:
if (num_entries >= 0) {
- D(("chunk ended before all entries could be read"))
+ D(("bad encint before all entries could be read"))
errors++;
}
}
sys->free(chunk);
@@ -570,11 +579,14 @@
if ((result = search_chunk(chm, chunk, filename, &p, &end)) <= 0) {
break;
}
/* found result. loop around for next chunk if this is PMGI */
- if (chunk[3] == 0x4C) break; else READ_ENCINT(n);
+ if (chunk[3] == 0x4C) break;
+
+ n = read_encint(&p, end, &err);
+ if (err) goto encint_err;
}
}
else {
/* PMGL chunks only, search from first_pmgl to last_pmgl */
for (n = chm->first_pmgl; n <= chm->last_pmgl;
@@ -597,25 +609,26 @@
}
}
/* if we found a file, read it */
if (result > 0) {
- READ_ENCINT(sec);
+ sec = read_encint(&p, end, &err);
f_ptr->section = (sec == 0) ? (struct mschmd_section *) &chm->sec0
: (struct mschmd_section *) &chm->sec1;
- READ_ENCINT(f_ptr->offset);
- READ_ENCINT(f_ptr->length);
+ f_ptr->offset = read_encint(&p, end, &err);
+ f_ptr->length = read_encint(&p, end, &err);
+ if (err) goto encint_err;
}
else if (result < 0) {
err = MSPACK_ERR_DATAFORMAT;
}
sys->close(fh);
return self->error = err;
- chunk_end:
- D(("read beyond end of chunk entries"))
+ encint_err:
+ D(("bad encint in PGMI/PGML chunk"))
sys->close(fh);
return self->error = MSPACK_ERR_DATAFORMAT;
}
/* reads the given chunk into memory, storing it in a chunk cache
@@ -695,11 +708,11 @@
const unsigned char **result_end)
{
const unsigned char *start, *end, *p;
unsigned int qr_size, num_entries, qr_entries, qr_density, name_len;
unsigned int L, R, M, fname_len, entries_off, is_pmgl;
- int cmp;
+ int cmp, err = 0;
fname_len = strlen(filename);
/* PMGL chunk or PMGI chunk? (note: read_chunk() has already
* checked the rest of the characters in the chunk signature) */
@@ -753,12 +766,12 @@
/* pick new midpoint */
M = (L + R) >> 1;
/* compare filename with entry QR points to */
p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)];
- READ_ENCINT(name_len);
- if (name_len > (unsigned int) (end - p)) goto chunk_end;
+ name_len = read_encint(&p, end, &err);
+ if (err || (name_len > (unsigned int) (end - p))) goto encint_err;
cmp = compare(filename, (char *)p, fname_len, name_len);
if (cmp == 0) break;
else if (cmp < 0) { if (M) R = M - 1; else return 0; }
else if (cmp > 0) L = M + 1;
@@ -790,12 +803,12 @@
* entry not found (PMGL) / maybe found (PMGI)
* -
*/
*result = NULL;
while (num_entries-- > 0) {
- READ_ENCINT(name_len);
- if (name_len > (unsigned int) (end - p)) goto chunk_end;
+ name_len = read_encint(&p, end, &err);
+ if (err || (name_len > (unsigned int) (end - p))) goto encint_err;
cmp = compare(filename, (char *)p, fname_len, name_len);
p += name_len;
if (cmp == 0) {
/* entry found */
@@ -808,25 +821,25 @@
break;
}
/* read and ignore the rest of this entry */
if (is_pmgl) {
- READ_ENCINT(R); /* skip section */
- READ_ENCINT(R); /* skip offset */
- READ_ENCINT(R); /* skip length */
+ while (p < end && (*p++ & 0x80)); /* skip section ENCINT */
+ while (p < end && (*p++ & 0x80)); /* skip offset ENCINT */
+ while (p < end && (*p++ & 0x80)); /* skip length ENCINT */
}
else {
*result = p; /* store potential final result */
- READ_ENCINT(R); /* skip chunk number */
+ while (p < end && (*p++ & 0x80)); /* skip chunk number ENCINT */
}
}
/* PMGL? not found. PMGI? maybe found */
return (is_pmgl) ? 0 : (*result ? 1 : 0);
- chunk_end:
- D(("reached end of chunk data while searching"))
+ encint_err:
+ D(("bad encint while searching"))
return -1;
}
#if HAVE_TOWLOWER
# include <wctype.h>
@@ -936,18 +949,23 @@
self->error = MSPACK_ERR_OK;
switch (file->section->id) {
case 0: /* Uncompressed section file */
/* simple seek + copy */
- if (sys->seek(self->d->infh, file->section->chm->sec0.offset
- + file->offset, MSPACK_SYS_SEEK_START))
+ if (sys->seek(self->d->infh, chm->sec0.offset + file->offset,
+ MSPACK_SYS_SEEK_START))
{
self->error = MSPACK_ERR_SEEK;
}
else {
unsigned char buf[512];
off_t length = file->length;
+ off_t maxlen = chm->length - sys->tell(self->d->infh);
+ if (length > maxlen) {
+ sys->message(fh, "WARNING; file is %" LD " bytes longer than CHM file",
+ length - maxlen);
+ }
while (length > 0) {
int run = sizeof(buf);
if ((off_t)run > length) run = (int)length;
if (sys->read(self->d->infh, &buf[0], run) != run) {
self->error = MSPACK_ERR_READ;
@@ -961,21 +979,27 @@
}
}
break;
case 1: /* MSCompressed section file */
- /* (re)initialise compression state if we it is not yet initialised,
+ /* (re)initialise compression state if not yet initialised,
* or we have advanced too far and have to backtrack
*/
if (!self->d->state || (file->offset < self->d->offset)) {
if (self->d->state) {
lzxd_free(self->d->state);
self->d->state = NULL;
}
if (chmd_init_decomp(self, file)) break;
}
+ /* check file offset is not impossible */
+ if (file->offset > self->d->length) {
+ self->error = MSPACK_ERR_DECRUNCH;
+ break;
+ }
+
/* seek to input data */
if (sys->seek(self->d->infh, self->d->inoffset, MSPACK_SYS_SEEK_START)) {
self->error = MSPACK_ERR_SEEK;
break;
}
@@ -986,12 +1010,19 @@
self->error = lzxd_decompress(self->d->state, bytes);
}
/* if getting to the correct offset was error free, unpack file */
if (!self->error) {
+ off_t length = file->length;
+ off_t maxlen = self->d->length - file->offset;
+ if (length > maxlen) {
+ sys->message(fh, "WARNING; file is %" LD " bytes longer than "
+ "compressed section", length - maxlen);
+ length = maxlen + 1; /* should decompress but still error out */
+ }
self->d->outfh = fh;
- self->error = lzxd_decompress(self->d->state, file->length);
+ self->error = lzxd_decompress(self->d->state, length);
}
/* save offset in input source stream, in case there is a section 0
* file between now and the next section 1 file extracted */
self->d->inoffset = sys->tell(self->d->infh);
@@ -1050,12 +1081,12 @@
/* ensure we have a ControlData file */
err = find_sys_file(self, sec, &sec->control, control_name);
if (err) return self->error = err;
/* read ControlData */
- if (sec->control->length < lzxcd_SIZEOF) {
- D(("ControlData file is too short"))
+ if (sec->control->length != lzxcd_SIZEOF) {
+ D(("ControlData file is wrong size"))
return self->error = MSPACK_ERR_DATAFORMAT;
}
if (!(data = read_sys_file(self, sec->control))) {
D(("can't read mscompressed control data file"))
return self->error;
@@ -1123,21 +1154,22 @@
/* if we can't read the reset table entry, just start from
* the beginning. Use spaninfo to get the uncompressed length */
entry = 0;
offset = 0;
err = read_spaninfo(self, sec, &length);
+ if (err) return self->error = err;
}
- if (err) return self->error = err;
/* get offset of compressed data stream:
* = offset of uncompressed section from start of file
* + offset of compressed stream from start of uncompressed section
* + offset of chosen reset interval from start of compressed stream */
self->d->inoffset = file->section->chm->sec0.offset + sec->content->offset + offset;
/* set start offset and overall remaining stream length */
self->d->offset = entry * LZX_FRAME_SIZE;
+ self->d->length = length;
length -= self->d->offset;
/* initialise LZX stream */
self->d->state = lzxd_init(&self->d->sys, self->d->infh,
(struct mspack_file *) self, window_bits,
@@ -1170,10 +1202,15 @@
/* read ResetTable file */
if (sec->rtable->length < lzxrt_headerSIZEOF) {
D(("ResetTable file is too short"))
return 0;
}
+ if (sec->rtable->length > 1000000) { /* arbitrary upper limit */
+ D(("ResetTable >1MB (%"LD"), report if genuine", sec->rtable->length))
+ return 0;
+ }
+
if (!(data = read_sys_file(self, sec->rtable))) {
D(("can't read reset table"))
return 0;
}
@@ -1244,10 +1281,16 @@
if (sec->spaninfo->length != 8) {
D(("SpanInfo file is wrong size"))
return MSPACK_ERR_DATAFORMAT;
}
+ /* unconditionally set length here, because gcc -Wuninitialized isn't
+ * clever enough to recognise that read_sys_file() will always set
+ * self->error to a non-zero value if it returns NULL, and gcc warnings
+ * spook humans (even false positives) */
+ *length_ptr = 0;
+
/* read the SpanInfo file */
if (!(data = read_sys_file(self, sec->spaninfo))) {
D(("can't read SpanInfo file"))
return self->error;
}
@@ -1362,16 +1405,53 @@
* are accepted, offsets beyond that cause an error message.
*/
static int read_off64(off_t *var, unsigned char *mem,
struct mspack_system *sys, struct mspack_file *fh)
{
-#if LARGEFILE_SUPPORT
+#if SIZEOF_OFF_T >= 8
*var = EndGetI64(mem);
#else
- *var = EndGetI32(mem);
- if ((*var & 0x80000000) || EndGetI32(mem+4)) {
- sys->message(fh, (char *)largefile_msg);
+ if ((mem[3] & 0x80) | mem[4] | mem[5] | mem[6] | mem[7]) {
+ sys->message(fh, "library not compiled to support large files.");
return 1;
}
+ *var = EndGetI32(mem);
#endif
return 0;
+}
+
+#if SIZEOF_OFF_T >= 8
+ /* 63 bits allowed: 9 * 7 bits/byte, last byte must be 0x00-0x7F */
+# define ENCINT_MAX_BYTES 9
+# define ENCINT_BAD_LAST_BYTE 0x80
+#else
+ /* 31 bits allowed: 5 * 7 bits/byte, last byte must be 0x00-0x07 */
+# define ENCINT_MAX_BYTES 5
+# define ENCINT_BAD_LAST_BYTE 0xF1
+#endif
+
+/***************************************
+ * READ_ENCINT
+ ***************************************
+ * Reads an ENCINT from memory. If running on a system with a 32-bit off_t,
+ * ENCINTs up to 0x7FFFFFFF are accepted, values beyond that are an error.
+ */
+static off_t read_encint(const unsigned char **p, const unsigned char *end,
+ int *err)
+{
+ off_t result = 0;
+ unsigned char c = 0x80;
+ int i = 0;
+ while ((c & 0x80) && (i++ < ENCINT_MAX_BYTES)) {
+ if (*p >= end) {
+ *err = 1;
+ return 0;
+ }
+ c = *(*p)++;
+ result = (result << 7) | (c & 0x7F);
+ }
+ if (i == ENCINT_MAX_BYTES && (c & ENCINT_BAD_LAST_BYTE)) {
+ *err = 1;
+ return 0;
+ }
+ return result;
}