chmd.c in libmspack-0.1.0

- old
+ new

@@ -252,11 +252,11 @@
 /* reads an encoded integer into a variable; 7 bits of data per byte,
  * the high bit is used to indicate that there is another byte */
 #define READ_ENCINT(var) do {			\
     (var) = 0;					\
     do {					\
-	if (p > end) goto chunk_end;		\
+	if (p >= end) goto chunk_end;		\
 	(var) = ((var) << 7) | (*p & 0x7F);	\
     } while (*p++ & 0x80);			\
 } while (0)
 
 static int chmd_read_headers(struct mspack_system *sys, struct mspack_file *fh,
@@ -443,11 +443,13 @@
     p = &chunk[pmgl_Entries];
     end = &chunk[chm->chunk_size - 2];
     num_entries = EndGetI16(end);
 
     while (num_entries--) {
-      READ_ENCINT(name_len); name = p; p += name_len;
+      READ_ENCINT(name_len);
+      if (name_len > (unsigned int) (end - p)) goto chunk_end;
+      name = p; p += name_len;
       READ_ENCINT(section);
       READ_ENCINT(offset);
       READ_ENCINT(length);
 
       /* empty files and directory names are stored as a file entry at
@@ -744,11 +746,11 @@
 	    M = (L + R) >> 1;
 
 	    /* compare filename with entry QR points to */
 	    p = &chunk[entries_off + (M ? EndGetI16(start - (M << 1)) : 0)];
 	    READ_ENCINT(name_len);
-	    if (p + name_len > end) goto chunk_end;
+	    if (name_len > (unsigned int) (end - p)) goto chunk_end;
 	    cmp = compare(filename, (char *)p, fname_len, name_len);
 
 	    if (cmp == 0) break;
 	    else if (cmp < 0) { if (M) R = M - 1; else return 0; }
 	    else if (cmp > 0) L = M + 1;
@@ -781,11 +783,11 @@
      * - 
      */
     *result = NULL;
     while (num_entries-- > 0) {
 	READ_ENCINT(name_len);
-	if (p + name_len > end) goto chunk_end;
+	if (name_len > (unsigned int) (end - p)) goto chunk_end;
 	cmp = compare(filename, (char *)p, fname_len, name_len);
 	p += name_len;
 
 	if (cmp == 0) {
 	    /* entry found */
@@ -848,39 +850,31 @@
     229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,
     248,249,250,251,252,253,254,255
 };
 #endif
 
-/* decodes a UTF-8 character from s[] into c. Will not read past e. */
+/* decodes a UTF-8 character from s[] into c. Will not read past e. 
+ * doesn't test that extension bytes are %10xxxxxx.
+ * allows some overlong encodings.
+ */
 #define GET_UTF8_CHAR(s, e, c) do {					\
     unsigned char x = *s++;						\
     if (x < 0x80) c = x;						\
-    else if (x < 0xC0) c = -1;						\
-    else if (x < 0xE0) {						\
-	c = (s >= e) ? -1 : ((x & 0x1F) << 6) | (*s++ & 0x3F);		\
+    else if (x >= 0xC2 && x < 0xE0 && s < e) {				\
+	c = (x & 0x1F) << 6 | (*s++ & 0x3F);				\
     }									\
-    else if (x < 0xF0) {						\
-        c = (s+2 > e) ? -1 : ((x & 0x0F) << 12)	| ((s[0] & 0x3F) <<  6)	\
-	    | (s[1] & 0x3F);						\
+    else if (x >= 0xE0 && x < 0xF0 && s+1 < e) {			\
+	c = (x & 0x0F) << 12 | (s[0] & 0x3F) << 6 | (s[1] & 0x3F);	\
 	s += 2;								\
     }									\
-    else if (x < 0xF8) {						\
-	c = (s+3 > e) ? -1 : ((x & 0x07) << 18) | ((s[0] & 0x3F) << 12) \
-	    | ((s[1] & 0x3F) << 6) | (s[2] & 0x3F);			\
+    else if (x >= 0xF0 && x <= 0xF5 && s+2 < e) {			\
+	c = (x & 0x07) << 18 | (s[0] & 0x3F) << 12 |			\
+	    (s[1] & 0x3F) << 6 | (s[2] & 0x3F);				\
+	if (c > 0x10FFFF) c = 0xFFFD;					\
 	s += 3;								\
     }									\
-    else if (x < 0xFC) {						\
-	c = (s+4 > e) ? -1 : ((x & 0x03) << 24) | ((s[0] & 0x3F) << 18) \
-	    | ((s[1] & 0x3F) << 12)|((s[2] & 0x3F) << 6)|(s[3] & 0x3F);	\
-	s += 4;								\
-    }									\
-    else if (x < 0xFE) {						\
-        c = (s+5>e)?-1:((x&1)<<30)|((s[0]&0x3F)<<24)|((s[1]&0x3F)<<18)| \
-	    ((s[2] & 0x3F) << 12) | ((s[3] & 0x3F) << 6)|(s[4] & 0x3F);	\
-	s += 5;								\
-    }									\
-    else c = -1;							\
+    else c = 0xFFFD;							\
 } while (0)
 
 /* case-insensitively compares two UTF8 encoded strings. String length for
  * both strings must be provided, null bytes are not terminators */
 static inline int compare(const char *s1, const char *s2, int l1, int l2) {
@@ -1121,10 +1115,10 @@
     D(("bad controldata window size"))
     return self->error = MSPACK_ERR_DATAFORMAT;
   }
 
   /* validate reset_interval */
-  if (reset_interval % LZX_FRAME_SIZE) {
+  if (reset_interval == 0 || reset_interval % LZX_FRAME_SIZE) {
     D(("bad controldata reset interval"))
     return self->error = MSPACK_ERR_DATAFORMAT;
   }
 
   /* which reset table entry would we like? */