ext/libmspack/mspack/chmd.c in libmspack-0.1.0 vs ext/libmspack/mspack/chmd.c in libmspack-0.10.1

- old
+ new

@@ -1,7 +1,7 @@ /* This file is part of libmspack. - * (C) 2003-2011 Stuart Caie. + * (C) 2003-2018 Stuart Caie. * * libmspack is free software; you can redistribute it and/or modify it under * the terms of the GNU Lesser General Public License (LGPL) version 2.1 * * For further details, see the file COPYING.LIB distributed with libmspack @@ -42,11 +42,11 @@ struct mspack_file *file, void *buffer, int bytes); static int chmd_init_decomp( struct mschm_decompressor_p *self, struct mschmd_file *file); static int read_reset_table( struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, - int entry, off_t *length_ptr, off_t *offset_ptr); + unsigned int entry, off_t *length_ptr, off_t *offset_ptr); static int read_spaninfo( struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, off_t *length_ptr); static int find_sys_file( struct mschm_decompressor_p *self, struct mschmd_sec_mscompressed *sec, @@ -119,11 +119,11 @@ *************************************** * opens a file and tries to read it as a CHM file. * Calls chmd_real_open() with entire=1. */ static struct mschmd_header *chmd_open(struct mschm_decompressor *base, - const char *filename) + const char *filename) { return chmd_real_open(base, filename, 1); } /*************************************** @@ -131,11 +131,11 @@ *************************************** * opens a file and tries to read it as a CHM file, but does not read * the file headers. Calls chmd_real_open() with entire=0 */ static struct mschmd_header *chmd_fast_open(struct mschm_decompressor *base, - const char *filename) + const char *filename) { return chmd_real_open(base, filename, 0); } /*************************************** @@ -144,11 +144,11 @@ * the real implementation of chmd_open() and chmd_fast_open(). It simply * passes the "entire" parameter to chmd_read_headers(), which will then * either read all headers, or a bare mininum. */ static struct mschmd_header *chmd_real_open(struct mschm_decompressor *base, - const char *filename, int entire) + const char *filename, int entire) { struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; struct mschmd_header *chm = NULL; struct mspack_system *sys; struct mspack_file *fh; @@ -160,20 +160,20 @@ if ((fh = sys->open(sys, filename, MSPACK_SYS_OPEN_READ))) { if ((chm = (struct mschmd_header *) sys->alloc(sys, sizeof(struct mschmd_header)))) { chm->filename = filename; error = chmd_read_headers(sys, fh, chm, entire); if (error) { - /* if the error is DATAFORMAT, and there are some results, return - * partial results with a warning, rather than nothing */ - if (error == MSPACK_ERR_DATAFORMAT && (chm->files || chm->sysfiles)) { - sys->message(fh, "WARNING; contents are corrupt"); - error = MSPACK_ERR_OK; - } - else { - chmd_close(base, chm); - chm = NULL; - } + /* if the error is DATAFORMAT, and there are some results, return + * partial results with a warning, rather than nothing */ + if (error == MSPACK_ERR_DATAFORMAT && (chm->files || chm->sysfiles)) { + sys->message(fh, "WARNING; contents are corrupt"); + error = MSPACK_ERR_OK; + } + else { + chmd_close(base, chm); + chm = NULL; + } } self->error = error; } else { self->error = MSPACK_ERR_NOMEMORY; @@ -190,11 +190,11 @@ * CHMD_CLOSE *************************************** * frees all memory associated with a given mschmd_header */ static void chmd_close(struct mschm_decompressor *base, - struct mschmd_header *chm) + struct mschmd_header *chm) { struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; struct mschmd_file *fi, *nfi; struct mspack_system *sys; unsigned int i; @@ -249,20 +249,20 @@ 0x9E, 0x0C, 0x00, 0xA0, 0xC9, 0x22, 0xE6, 0xEC }; /* reads an encoded integer into a variable; 7 bits of data per byte, * the high bit is used to indicate that there is another byte */ -#define READ_ENCINT(var) do { \ - (var) = 0; \ - do { \ - if (p >= end) goto chunk_end; \ - (var) = ((var) << 7) | (*p & 0x7F); \ - } while (*p++ & 0x80); \ +#define READ_ENCINT(var) do { \ + (var) = 0; \ + do { \ + if (p >= end) goto chunk_end; \ + (var) = ((var) << 7) | (*p & 0x7F); \ + } while (*p++ & 0x80); \ } while (0) static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh, - struct mschmd_header *chm, int entire) + struct mschmd_header *chm, int entire) { unsigned int section, name_len, x, errors, num_chunks; unsigned char buf[0x54], *chunk = NULL, *name, *p, *end; struct mschmd_file *fi, *link = NULL; off_t offset, length; @@ -290,11 +290,11 @@ if (EndGetI32(&buf[chmhead_Signature]) != 0x46535449) { return MSPACK_ERR_SIGNATURE; } /* check both header GUIDs */ - if (mspack_memcmp(&buf[chmhead_GUID1], &guids[0], 32L) != 0) { + if (memcmp(&buf[chmhead_GUID1], &guids[0], 32L) != 0) { D(("incorrect GUIDs")) return MSPACK_ERR_SIGNATURE; } chm->version = EndGetI32(&buf[chmhead_Version]); @@ -378,28 +378,32 @@ * cache is implemented, put arbitrary limits on num_chunks and chunk size. */ if (chm->num_chunks > 100000) { D(("more than 100,000 chunks")) return MSPACK_ERR_DATAFORMAT; - } + } + if (chm->chunk_size > 8192) { + D(("chunk size over 8192 (get in touch if this is valid)")) + return MSPACK_ERR_DATAFORMAT; + } if ((off_t)chm->chunk_size * (off_t)chm->num_chunks > chm->length) { D(("chunks larger than entire file")) return MSPACK_ERR_DATAFORMAT; } /* common sense checks on header section 1 fields */ - if ((chm->chunk_size & (chm->chunk_size - 1)) != 0) { - sys->message(fh, "WARNING; chunk size is not a power of two"); + if (chm->chunk_size != 4096) { + sys->message(fh, "WARNING; chunk size is not 4096"); } if (chm->first_pmgl != 0) { sys->message(fh, "WARNING; first PMGL chunk is not zero"); } if (chm->first_pmgl > chm->last_pmgl) { D(("first pmgl chunk is after last pmgl chunk")) return MSPACK_ERR_DATAFORMAT; } - if (chm->index_root != 0xFFFFFFFF && chm->index_root > chm->num_chunks) { + if (chm->index_root != 0xFFFFFFFF && chm->index_root >= chm->num_chunks) { D(("index_root outside valid range")) return MSPACK_ERR_DATAFORMAT; } /* if we are doing a quick read, stop here! */ @@ -433,11 +437,11 @@ if (EndGetI32(&chunk[pmgl_QuickRefSize]) < 2) { sys->message(fh, "WARNING; PMGL quickref area is too small"); } if (EndGetI32(&chunk[pmgl_QuickRefSize]) > - ((int)chm->chunk_size - pmgl_Entries)) + (chm->chunk_size - pmgl_Entries)) { sys->message(fh, "WARNING; PMGL quickref area is too large"); } p = &chunk[pmgl_Entries]; @@ -450,59 +454,60 @@ name = p; p += name_len; READ_ENCINT(section); READ_ENCINT(offset); READ_ENCINT(length); + /* ignore blank or one-char (e.g. "/") filenames we'd return as blank */ + if (name_len < 2 || !name[0] || !name[1]) continue; + /* empty files and directory names are stored as a file entry at * offset 0 with length 0. We want to keep empty files, but not * directory names, which end with a "/" */ if ((offset == 0) && (length == 0)) { - if ((name_len > 0) && (name[name_len-1] == '/')) continue; + if ((name_len > 0) && (name[name_len-1] == '/')) continue; } if (section > 1) { - sys->message(fh, "invalid section number '%u'.", section); - continue; + sys->message(fh, "invalid section number '%u'.", section); + continue; } if (!(fi = (struct mschmd_file *) sys->alloc(sys, sizeof(struct mschmd_file) + name_len + 1))) { - sys->free(chunk); - return MSPACK_ERR_NOMEMORY; + sys->free(chunk); + return MSPACK_ERR_NOMEMORY; } fi->next = NULL; fi->filename = (char *) &fi[1]; fi->section = ((section == 0) ? (struct mschmd_section *) (&chm->sec0) - : (struct mschmd_section *) (&chm->sec1)); + : (struct mschmd_section *) (&chm->sec1)); fi->offset = offset; fi->length = length; sys->copy(name, fi->filename, (size_t) name_len); fi->filename[name_len] = '\0'; if (name[0] == ':' && name[1] == ':') { - /* system file */ - if (mspack_memcmp(&name[2], &content_name[2], 31L) == 0) { - if (mspack_memcmp(&name[33], &content_name[33], 8L) == 0) { - chm->sec1.content = fi; - } - else if (mspack_memcmp(&name[33], &control_name[33], 11L) == 0) { - chm->sec1.control = fi; - } - else if (mspack_memcmp(&name[33], &spaninfo_name[33], 8L) == 0) { - chm->sec1.spaninfo = fi; - } - else if (mspack_memcmp(&name[33], &rtable_name[33], 72L) == 0) { - chm->sec1.rtable = fi; - } - } - fi->next = chm->sysfiles; - chm->sysfiles = fi; + /* system file */ + if (name_len == 40 && memcmp(name, content_name, 40) == 0) { + chm->sec1.content = fi; + } + else if (name_len == 44 && memcmp(name, control_name, 44) == 0) { + chm->sec1.control = fi; + } + else if (name_len == 41 && memcmp(name, spaninfo_name, 41) == 0) { + chm->sec1.spaninfo = fi; + } + else if (name_len == 105 && memcmp(name, rtable_name, 105) == 0) { + chm->sec1.rtable = fi; + } + fi->next = chm->sysfiles; + chm->sysfiles = fi; } else { - /* normal file */ - if (link) link->next = fi; else chm->files = fi; - link = fi; + /* normal file */ + if (link) link->next = fi; else chm->files = fi; + link = fi; } } /* this is reached either when num_entries runs out, or if * reading data from the chunk reached a premature end of chunk */ @@ -522,85 +527,88 @@ *************************************** * uses PMGI index chunks and quickref data to quickly locate a file * directly from the on-disk index. * * TODO: protect against infinite loops in chunks (where pgml_NextChunk - * or a PGMI index entry point to an already visited chunk) + * or a PMGI index entry point to an already visited chunk) */ static int chmd_fast_find(struct mschm_decompressor *base, - struct mschmd_header *chm, const char *filename, - struct mschmd_file *f_ptr, int f_size) + struct mschmd_header *chm, const char *filename, + struct mschmd_file *f_ptr, int f_size) { struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; struct mspack_system *sys; struct mspack_file *fh; - const unsigned char *chunk, *p, *end; + /* p and end are initialised to prevent MSVC warning about "potentially" + * uninitialised usage. This is provably untrue, but MS won't fix: + * https://developercommunity.visualstudio.com/content/problem/363489/c4701-false-positive-warning.html */ + const unsigned char *chunk, *p = NULL, *end = NULL; int err = MSPACK_ERR_OK, result = -1; unsigned int n, sec; if (!self || !chm || !f_ptr || (f_size != sizeof(struct mschmd_file))) { - return MSPACK_ERR_ARGS; + return MSPACK_ERR_ARGS; } sys = self->system; /* clear the results structure */ memset(f_ptr, 0, f_size); if (!(fh = sys->open(sys, chm->filename, MSPACK_SYS_OPEN_READ))) { - return MSPACK_ERR_OPEN; + return MSPACK_ERR_OPEN; } /* go through PMGI chunk hierarchy to reach PMGL chunk */ if (chm->index_root < chm->num_chunks) { - n = chm->index_root; - for (;;) { - if (!(chunk = read_chunk(self, chm, fh, n))) { - sys->close(fh); - return self->error; - } + n = chm->index_root; + for (;;) { + if (!(chunk = read_chunk(self, chm, fh, n))) { + sys->close(fh); + return self->error; + } - /* search PMGI/PMGL chunk. exit early if no entry found */ - if ((result = search_chunk(chm, chunk, filename, &p, &end)) <= 0) { - break; - } + /* search PMGI/PMGL chunk. exit early if no entry found */ + if ((result = search_chunk(chm, chunk, filename, &p, &end)) <= 0) { + break; + } - /* found result. loop around for next chunk if this is PMGI */ - if (chunk[3] == 0x4C) break; else READ_ENCINT(n); - } + /* found result. loop around for next chunk if this is PMGI */ + if (chunk[3] == 0x4C) break; else READ_ENCINT(n); + } } else { - /* PMGL chunks only, search from first_pmgl to last_pmgl */ - for (n = chm->first_pmgl; n <= chm->last_pmgl; - n = EndGetI32(&chunk[pmgl_NextChunk])) - { - if (!(chunk = read_chunk(self, chm, fh, n))) { - err = self->error; - break; - } + /* PMGL chunks only, search from first_pmgl to last_pmgl */ + for (n = chm->first_pmgl; n <= chm->last_pmgl; + n = EndGetI32(&chunk[pmgl_NextChunk])) + { + if (!(chunk = read_chunk(self, chm, fh, n))) { + err = self->error; + break; + } - /* search PMGL chunk. exit if file found */ - if ((result = search_chunk(chm, chunk, filename, &p, &end)) > 0) { - break; - } + /* search PMGL chunk. exit if file found */ + if ((result = search_chunk(chm, chunk, filename, &p, &end)) > 0) { + break; + } /* stop simple infinite loops: can't visit the same chunk twice */ - if ((int)n == EndGetI32(&chunk[pmgl_NextChunk])) { + if (n == EndGetI32(&chunk[pmgl_NextChunk])) { break; } - } + } } /* if we found a file, read it */ if (result > 0) { - READ_ENCINT(sec); - f_ptr->section = (sec == 0) ? (struct mschmd_section *) &chm->sec0 - : (struct mschmd_section *) &chm->sec1; - READ_ENCINT(f_ptr->offset); - READ_ENCINT(f_ptr->length); + READ_ENCINT(sec); + f_ptr->section = (sec == 0) ? (struct mschmd_section *) &chm->sec0 + : (struct mschmd_section *) &chm->sec1; + READ_ENCINT(f_ptr->offset); + READ_ENCINT(f_ptr->length); } else if (result < 0) { - err = MSPACK_ERR_DATAFORMAT; + err = MSPACK_ERR_DATAFORMAT; } sys->close(fh); return self->error = err; @@ -612,60 +620,60 @@ /* reads the given chunk into memory, storing it in a chunk cache * so it doesn't need to be read from disk more than once */ static unsigned char *read_chunk(struct mschm_decompressor_p *self, - struct mschmd_header *chm, - struct mspack_file *fh, - unsigned int chunk_num) + struct mschmd_header *chm, + struct mspack_file *fh, + unsigned int chunk_num) { struct mspack_system *sys = self->system; unsigned char *buf; /* check arguments - most are already checked by chmd_fast_find */ - if (chunk_num > chm->num_chunks) return NULL; + if (chunk_num >= chm->num_chunks) return NULL; /* ensure chunk cache is available */ if (!chm->chunk_cache) { - size_t size = sizeof(unsigned char *) * chm->num_chunks; - if (!(chm->chunk_cache = (unsigned char **) sys->alloc(sys, size))) { - self->error = MSPACK_ERR_NOMEMORY; - return NULL; - } - memset(chm->chunk_cache, 0, size); + size_t size = sizeof(unsigned char *) * chm->num_chunks; + if (!(chm->chunk_cache = (unsigned char **) sys->alloc(sys, size))) { + self->error = MSPACK_ERR_NOMEMORY; + return NULL; + } + memset(chm->chunk_cache, 0, size); } /* try to answer out of chunk cache */ if (chm->chunk_cache[chunk_num]) return chm->chunk_cache[chunk_num]; /* need to read chunk - allocate memory for it */ if (!(buf = (unsigned char *) sys->alloc(sys, chm->chunk_size))) { - self->error = MSPACK_ERR_NOMEMORY; - return NULL; + self->error = MSPACK_ERR_NOMEMORY; + return NULL; } /* seek to block and read it */ if (sys->seek(fh, (off_t) (chm->dir_offset + (chunk_num * chm->chunk_size)), - MSPACK_SYS_SEEK_START)) + MSPACK_SYS_SEEK_START)) { - self->error = MSPACK_ERR_SEEK; - sys->free(buf); - return NULL; + self->error = MSPACK_ERR_SEEK; + sys->free(buf); + return NULL; } if (sys->read(fh, buf, (int)chm->chunk_size) != (int)chm->chunk_size) { - self->error = MSPACK_ERR_READ; - sys->free(buf); - return NULL; + self->error = MSPACK_ERR_READ; + sys->free(buf); + return NULL; } /* check the signature. Is is PMGL or PMGI? */ if (!((buf[0] == 0x50) && (buf[1] == 0x4D) && (buf[2] == 0x47) && - ((buf[3] == 0x4C) || (buf[3] == 0x49)))) + ((buf[3] == 0x4C) || (buf[3] == 0x49)))) { - self->error = MSPACK_ERR_SEEK; - sys->free(buf); - return NULL; + self->error = MSPACK_ERR_SEEK; + sys->free(buf); + return NULL; } /* all OK. Store chunk in cache and return it */ return chm->chunk_cache[chunk_num] = buf; } @@ -679,14 +687,14 @@ * In the case of PMGL chunks, the entry has definitely been * found. In the case of PMGI chunks, the entry which points to the * chunk that may eventually contain that entry has been found. */ static int search_chunk(struct mschmd_header *chm, - const unsigned char *chunk, - const char *filename, - const unsigned char **result, - const unsigned char **result_end) + const unsigned char *chunk, + const char *filename, + const unsigned char **result, + const unsigned char **result_end) { const unsigned char *start, *end, *p; unsigned int qr_size, num_entries, qr_entries, qr_density, name_len; unsigned int L, R, M, fname_len, entries_off, is_pmgl; int cmp; @@ -694,16 +702,16 @@ fname_len = strlen(filename); /* PMGL chunk or PMGI chunk? (note: read_chunk() has already * checked the rest of the characters in the chunk signature) */ if (chunk[3] == 0x4C) { - is_pmgl = 1; - entries_off = pmgl_Entries; + is_pmgl = 1; + entries_off = pmgl_Entries; } else { - is_pmgl = 0; - entries_off = pmgi_Entries; + is_pmgl = 0; + entries_off = pmgi_Entries; } /* Step 1: binary search first filename of each QR entry * - target filename == entry * found file @@ -720,59 +728,59 @@ num_entries = EndGetI16(start); qr_density = 1 + (1 << chm->density); qr_entries = (num_entries + qr_density-1) / qr_density; if (num_entries == 0) { - D(("chunk has no entries")) - return -1; + D(("chunk has no entries")) + return -1; } if (qr_size > chm->chunk_size) { - D(("quickref size > chunk size")) - return -1; + D(("quickref size > chunk size")) + return -1; } *result_end = end; if (((int)qr_entries * 2) > (start - end)) { - D(("WARNING; more quickrefs than quickref space")) - qr_entries = 0; /* but we can live with it */ + D(("WARNING; more quickrefs than quickref space")) + qr_entries = 0; /* but we can live with it */ } if (qr_entries > 0) { - L = 0; - R = qr_entries - 1; - do { - /* pick new midpoint */ - M = (L + R) >> 1; + L = 0; + R = qr_entries - 1; + do { + /* pick new midpoint */ + M = (L + R) >> 1; - /* compare filename with entry QR points to */ - p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)]; - READ_ENCINT(name_len); - if (name_len > (unsigned int) (end - p)) goto chunk_end; - cmp = compare(filename, (char *)p, fname_len, name_len); + /* compare filename with entry QR points to */ + p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)]; + READ_ENCINT(name_len); + if (name_len > (unsigned int) (end - p)) goto chunk_end; + cmp = compare(filename, (char *)p, fname_len, name_len); - if (cmp == 0) break; - else if (cmp < 0) { if (M) R = M - 1; else return 0; } - else if (cmp > 0) L = M + 1; - } while (L <= R); - M = (L + R) >> 1; + if (cmp == 0) break; + else if (cmp < 0) { if (M) R = M - 1; else return 0; } + else if (cmp > 0) L = M + 1; + } while (L <= R); + M = (L + R) >> 1; - if (cmp == 0) { - /* exact match! */ - p += name_len; - *result = p; - return 1; - } + if (cmp == 0) { + /* exact match! */ + p += name_len; + *result = p; + return 1; + } - /* otherwise, read the group of entries for QR entry M */ - p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)]; - num_entries -= (M * qr_density); - if (num_entries > qr_density) num_entries = qr_density; + /* otherwise, read the group of entries for QR entry M */ + p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)]; + num_entries -= (M * qr_density); + if (num_entries > qr_density) num_entries = qr_density; } else { - p = &chunk[entries_off]; + p = &chunk[entries_off]; } /* Step 2: linear search through the set of entries reached in step 1. * - filename == any entry * found entry @@ -782,36 +790,36 @@ * entry not found (PMGL) / maybe found (PMGI) * - */ *result = NULL; while (num_entries-- > 0) { - READ_ENCINT(name_len); - if (name_len > (unsigned int) (end - p)) goto chunk_end; - cmp = compare(filename, (char *)p, fname_len, name_len); - p += name_len; + READ_ENCINT(name_len); + if (name_len > (unsigned int) (end - p)) goto chunk_end; + cmp = compare(filename, (char *)p, fname_len, name_len); + p += name_len; - if (cmp == 0) { - /* entry found */ - *result = p; - return 1; - } + if (cmp == 0) { + /* entry found */ + *result = p; + return 1; + } - if (cmp < 0) { - /* entry not found (PMGL) / maybe found (PMGI) */ - break; - } + if (cmp < 0) { + /* entry not found (PMGL) / maybe found (PMGI) */ + break; + } - /* read and ignore the rest of this entry */ - if (is_pmgl) { - READ_ENCINT(R); /* skip section */ - READ_ENCINT(R); /* skip offset */ - READ_ENCINT(R); /* skip length */ - } - else { - *result = p; /* store potential final result */ - READ_ENCINT(R); /* skip chunk number */ - } + /* read and ignore the rest of this entry */ + if (is_pmgl) { + READ_ENCINT(R); /* skip section */ + READ_ENCINT(R); /* skip offset */ + READ_ENCINT(R); /* skip length */ + } + else { + *result = p; /* store potential final result */ + READ_ENCINT(R); /* skip chunk number */ + } } /* PMGL? not found. PMGI? maybe found */ return (is_pmgl) ? 0 : (*result ? 1 : 0); @@ -819,62 +827,38 @@ D(("reached end of chunk data while searching")) return -1; } #if HAVE_TOWLOWER -# if HAVE_WCTYPE_H -# include <wctype.h> -# endif +# include <wctype.h> # define TOLOWER(x) towlower(x) -#elif HAVE_TOLOWER -# if HAVE_CTYPE_H -# include <ctype.h> -# endif -# define TOLOWER(x) tolower(x) #else -# define TOLOWER(x) (((x)<0||(x)>256)?(x):mspack_tolower_map[(x)]) -/* Map of char -> lowercase char for the first 256 chars. Generated with: - * LC_CTYPE=en_GB.utf-8 perl -Mlocale -le 'print map{ord(lc chr).","} 0..255' - */ -static const unsigned char mspack_tolower_map[256] = { - 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27, - 28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52, - 53,54,55,56,57,58,59,60,61,62,63,64,97,98,99,100,101,102,103,104,105,106, - 107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,91,92,93,94, - 95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114, - 115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133, - 134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152, - 153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171, - 172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190, - 191,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241, - 242,243,244,245,246,215,248,249,250,251,252,253,254,223,224,225,226,227,228, - 229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247, - 248,249,250,251,252,253,254,255 -}; +# include <ctype.h> +# define TOLOWER(x) tolower(x) #endif /* decodes a UTF-8 character from s[] into c. Will not read past e. * doesn't test that extension bytes are %10xxxxxx. * allows some overlong encodings. */ -#define GET_UTF8_CHAR(s, e, c) do { \ - unsigned char x = *s++; \ - if (x < 0x80) c = x; \ - else if (x >= 0xC2 && x < 0xE0 && s < e) { \ - c = (x & 0x1F) << 6 | (*s++ & 0x3F); \ - } \ - else if (x >= 0xE0 && x < 0xF0 && s+1 < e) { \ - c = (x & 0x0F) << 12 | (s[0] & 0x3F) << 6 | (s[1] & 0x3F); \ - s += 2; \ - } \ - else if (x >= 0xF0 && x <= 0xF5 && s+2 < e) { \ - c = (x & 0x07) << 18 | (s[0] & 0x3F) << 12 | \ - (s[1] & 0x3F) << 6 | (s[2] & 0x3F); \ - if (c > 0x10FFFF) c = 0xFFFD; \ - s += 3; \ - } \ - else c = 0xFFFD; \ +#define GET_UTF8_CHAR(s, e, c) do { \ + unsigned char x = *s++; \ + if (x < 0x80) c = x; \ + else if (x >= 0xC2 && x < 0xE0 && s < e) { \ + c = (x & 0x1F) << 6 | (*s++ & 0x3F); \ + } \ + else if (x >= 0xE0 && x < 0xF0 && s+1 < e) { \ + c = (x & 0x0F) << 12 | (s[0] & 0x3F) << 6 | (s[1] & 0x3F); \ + s += 2; \ + } \ + else if (x >= 0xF0 && x <= 0xF5 && s+2 < e) { \ + c = (x & 0x07) << 18 | (s[0] & 0x3F) << 12 | \ + (s[1] & 0x3F) << 6 | (s[2] & 0x3F); \ + if (c > 0x10FFFF) c = 0xFFFD; \ + s += 3; \ + } \ + else c = 0xFFFD; \ } while (0) /* case-insensitively compares two UTF8 encoded strings. String length for * both strings must be provided, null bytes are not terminators */ static inline int compare(const char *s1, const char *s2, int l1, int l2) { @@ -882,16 +866,16 @@ register const unsigned char *p2 = (const unsigned char *) s2; register const unsigned char *e1 = p1 + l1, *e2 = p2 + l2; int c1, c2; while (p1 < e1 && p2 < e2) { - GET_UTF8_CHAR(p1, e1, c1); - GET_UTF8_CHAR(p2, e2, c2); - if (c1 == c2) continue; - c1 = TOLOWER(c1); - c2 = TOLOWER(c2); - if (c1 != c2) return c1 - c2; + GET_UTF8_CHAR(p1, e1, c1); + GET_UTF8_CHAR(p2, e2, c2); + if (c1 == c2) continue; + c1 = TOLOWER(c1); + c2 = TOLOWER(c2); + if (c1 != c2) return c1 - c2; } return l1 - l2; } @@ -899,11 +883,11 @@ * CHMD_EXTRACT *************************************** * extracts a file from a CHM helpfile */ static int chmd_extract(struct mschm_decompressor *base, - struct mschmd_file *file, const char *filename) + struct mschmd_file *file, const char *filename) { struct mschm_decompressor_p *self = (struct mschm_decompressor_p *) base; struct mspack_system *sys; struct mschmd_header *chm; struct mspack_file *fh; @@ -953,41 +937,41 @@ switch (file->section->id) { case 0: /* Uncompressed section file */ /* simple seek + copy */ if (sys->seek(self->d->infh, file->section->chm->sec0.offset - + file->offset, MSPACK_SYS_SEEK_START)) + + file->offset, MSPACK_SYS_SEEK_START)) { self->error = MSPACK_ERR_SEEK; } else { unsigned char buf[512]; off_t length = file->length; while (length > 0) { - int run = sizeof(buf); - if ((off_t)run > length) run = (int)length; - if (sys->read(self->d->infh, &buf[0], run) != run) { - self->error = MSPACK_ERR_READ; - break; - } - if (sys->write(fh, &buf[0], run) != run) { - self->error = MSPACK_ERR_WRITE; - break; - } - length -= run; + int run = sizeof(buf); + if ((off_t)run > length) run = (int)length; + if (sys->read(self->d->infh, &buf[0], run) != run) { + self->error = MSPACK_ERR_READ; + break; + } + if (sys->write(fh, &buf[0], run) != run) { + self->error = MSPACK_ERR_WRITE; + break; + } + length -= run; } } break; case 1: /* MSCompressed section file */ /* (re)initialise compression state if we it is not yet initialised, * or we have advanced too far and have to backtrack */ if (!self->d->state || (file->offset < self->d->offset)) { if (self->d->state) { - lzxd_free(self->d->state); - self->d->state = NULL; + lzxd_free(self->d->state); + self->d->state = NULL; } if (chmd_init_decomp(self, file)) break; } /* seek to input data */ @@ -1047,11 +1031,11 @@ * Initialises the LZX decompressor to decompress the compressed stream, * from the nearest reset offset and length that is needed for the given * file. */ static int chmd_init_decomp(struct mschm_decompressor_p *self, - struct mschmd_file *file) + struct mschmd_file *file) { int window_size, window_bits, reset_interval, entry, err; struct mspack_system *sys = self->system; struct mschmd_sec_mscompressed *sec; unsigned char *data; @@ -1154,13 +1138,13 @@ self->d->offset = entry * LZX_FRAME_SIZE; length -= self->d->offset; /* initialise LZX stream */ self->d->state = lzxd_init(&self->d->sys, self->d->infh, - (struct mspack_file *) self, window_bits, - reset_interval / LZX_FRAME_SIZE, - 4096, length, 0); + (struct mspack_file *) self, window_bits, + reset_interval / LZX_FRAME_SIZE, + 4096, length, 0); if (!self->d->state) self->error = MSPACK_ERR_NOMEMORY; return self->error; } /*************************************** @@ -1169,12 +1153,13 @@ * Reads one entry out of the reset table. Also reads the uncompressed * data length. Writes these to offset_ptr and length_ptr respectively. * Returns non-zero for success, zero for failure. */ static int read_reset_table(struct mschm_decompressor_p *self, - struct mschmd_sec_mscompressed *sec, - int entry, off_t *length_ptr, off_t *offset_ptr) + struct mschmd_sec_mscompressed *sec, + unsigned int entry, + off_t *length_ptr, off_t *offset_ptr) { struct mspack_system *sys = self->system; unsigned char *data; unsigned int pos, entrysize; @@ -1182,55 +1167,55 @@ int err = find_sys_file(self, sec, &sec->rtable, rtable_name); if (err) return 0; /* read ResetTable file */ if (sec->rtable->length < lzxrt_headerSIZEOF) { - D(("ResetTable file is too short")) - return 0; + D(("ResetTable file is too short")) + return 0; } if (!(data = read_sys_file(self, sec->rtable))) { - D(("can't read reset table")) - return 0; + D(("can't read reset table")) + return 0; } /* check sanity of reset table */ if (EndGetI32(&data[lzxrt_FrameLen]) != LZX_FRAME_SIZE) { - D(("bad reset table frame length")) - sys->free(data); - return 0; + D(("bad reset table frame length")) + sys->free(data); + return 0; } /* get the uncompressed length of the LZX stream */ if (read_off64(length_ptr, &data[lzxrt_UncompLen], sys, self->d->infh)) { - sys->free(data); - return 0; + sys->free(data); + return 0; } entrysize = EndGetI32(&data[lzxrt_EntrySize]); pos = EndGetI32(&data[lzxrt_TableOffset]) + (entry * entrysize); /* ensure reset table entry for this offset exists */ if (entry < EndGetI32(&data[lzxrt_NumEntries]) && - pos <= (sec->rtable->length - entrysize)) + pos <= (sec->rtable->length - entrysize)) { - switch (entrysize) { - case 4: - *offset_ptr = EndGetI32(&data[pos]); - err = 0; - break; - case 8: - err = read_off64(offset_ptr, &data[pos], sys, self->d->infh); - break; - default: - D(("reset table entry size neither 4 nor 8")) - err = 1; - break; - } + switch (entrysize) { + case 4: + *offset_ptr = EndGetI32(&data[pos]); + err = 0; + break; + case 8: + err = read_off64(offset_ptr, &data[pos], sys, self->d->infh); + break; + default: + D(("reset table entry size neither 4 nor 8")) + err = 1; + break; + } } else { - D(("bad reset interval")) - err = 1; + D(("bad reset interval")) + err = 1; } /* free the reset table */ sys->free(data); @@ -1243,66 +1228,72 @@ *************************************** * Reads the uncompressed data length from the spaninfo file. * Returns zero for success or a non-zero error code for failure. */ static int read_spaninfo(struct mschm_decompressor_p *self, - struct mschmd_sec_mscompressed *sec, - off_t *length_ptr) + struct mschmd_sec_mscompressed *sec, + off_t *length_ptr) { struct mspack_system *sys = self->system; unsigned char *data; /* find SpanInfo file */ int err = find_sys_file(self, sec, &sec->spaninfo, spaninfo_name); if (err) return MSPACK_ERR_DATAFORMAT; /* check it's large enough */ if (sec->spaninfo->length != 8) { - D(("SpanInfo file is wrong size")) - return MSPACK_ERR_DATAFORMAT; + D(("SpanInfo file is wrong size")) + return MSPACK_ERR_DATAFORMAT; } /* read the SpanInfo file */ if (!(data = read_sys_file(self, sec->spaninfo))) { - D(("can't read SpanInfo file")) - return self->error; + D(("can't read SpanInfo file")) + return self->error; } /* get the uncompressed length of the LZX stream */ err = read_off64(length_ptr, data, sys, self->d->infh); - sys->free(data); - return (err) ? MSPACK_ERR_DATAFORMAT : MSPACK_ERR_OK; + if (err) return MSPACK_ERR_DATAFORMAT; + + if (*length_ptr <= 0) { + D(("output length is invalid")) + return MSPACK_ERR_DATAFORMAT; + } + + return MSPACK_ERR_OK; } /*************************************** * FIND_SYS_FILE *************************************** * Uses chmd_fast_find to locate a system file, and fills out that system * file's entry and links it into the list of system files. Returns zero * for success, non-zero for both failure and the file not existing. */ static int find_sys_file(struct mschm_decompressor_p *self, - struct mschmd_sec_mscompressed *sec, - struct mschmd_file **f_ptr, const char *name) + struct mschmd_sec_mscompressed *sec, + struct mschmd_file **f_ptr, const char *name) { struct mspack_system *sys = self->system; struct mschmd_file result; /* already loaded */ if (*f_ptr) return MSPACK_ERR_OK; /* try using fast_find to find the file - return DATAFORMAT error if * it fails, or successfully doesn't find the file */ if (chmd_fast_find((struct mschm_decompressor *) self, sec->base.chm, - name, &result, (int)sizeof(result)) || !result.section) + name, &result, (int)sizeof(result)) || !result.section) { - return MSPACK_ERR_DATAFORMAT; + return MSPACK_ERR_DATAFORMAT; } if (!(*f_ptr = (struct mschmd_file *) sys->alloc(sys, sizeof(result)))) { - return MSPACK_ERR_NOMEMORY; + return MSPACK_ERR_NOMEMORY; } /* copy result */ *(*f_ptr) = result; (*f_ptr)->filename = (char *) name; @@ -1318,11 +1309,11 @@ *************************************** * Allocates memory for a section 0 (uncompressed) file and reads it into * memory. */ static unsigned char *read_sys_file(struct mschm_decompressor_p *self, - struct mschmd_file *file) + struct mschmd_file *file) { struct mspack_system *sys = self->system; unsigned char *data = NULL; int len; @@ -1336,11 +1327,11 @@ if (!(data = (unsigned char *) sys->alloc(sys, (size_t) len))) { self->error = MSPACK_ERR_NOMEMORY; return NULL; } if (sys->seek(self->d->infh, file->section->chm->sec0.offset - + file->offset, MSPACK_SYS_SEEK_START)) + + file->offset, MSPACK_SYS_SEEK_START)) { self->error = MSPACK_ERR_SEEK; sys->free(data); return NULL; } @@ -1369,18 +1360,18 @@ * If running on a system with a 64-bit off_t, this is simply done. * If running on a system with a 32-bit off_t, offsets up to 0x7FFFFFFF * are accepted, offsets beyond that cause an error message. */ static int read_off64(off_t *var, unsigned char *mem, - struct mspack_system *sys, struct mspack_file *fh) + struct mspack_system *sys, struct mspack_file *fh) { -#ifdef LARGEFILE_SUPPORT +#if LARGEFILE_SUPPORT *var = EndGetI64(mem); #else *var = EndGetI32(mem); if ((*var & 0x80000000) || EndGetI32(mem+4)) { - sys->message(fh, (char *)largefile_msg); - return 1; + sys->message(fh, (char *)largefile_msg); + return 1; } #endif return 0; }