ext/ox/dump.c in ox-2.2.4 vs ext/ox/dump.c in ox-2.3.0

- old
+ new

@@ -76,12 +76,14 @@ static int is_xml_friendly(const uchar *str, int len); static const char hex_chars[17] = "0123456789abcdef"; +// The : character is equivalent to 10. Used for replacement characters up to 10 +// characters long such as '&#x10FFFF;'. static char xml_friendly_chars[257] = "\ -88888888811881888888888888888888\ +:::::::::11::1::::::::::::::::::\ 11611156111111111111111111114141\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ 11111111111111111111111111111111\ @@ -313,54 +315,75 @@ } inline static void dump_str_value(Out out, const char *value, size_t size) { size_t xsize = xml_str_len((const uchar*)value, size); - + if (out->end - out->cur <= (long)xsize) { grow(out, xsize); } for (; '\0' != *value; value++) { if ('1' == xml_friendly_chars[(uchar)*value]) { *out->cur++ = *value; } else { - *out->cur++ = '&'; switch (*value) { case '"': + *out->cur++ = '&'; *out->cur++ = 'q'; *out->cur++ = 'u'; *out->cur++ = 'o'; *out->cur++ = 't'; + *out->cur++ = ';'; break; case '&': + *out->cur++ = '&'; *out->cur++ = 'a'; *out->cur++ = 'm'; *out->cur++ = 'p'; + *out->cur++ = ';'; break; case '\'': + *out->cur++ = '&'; *out->cur++ = 'a'; *out->cur++ = 'p'; *out->cur++ = 'o'; *out->cur++ = 's'; + *out->cur++ = ';'; break; case '<': + *out->cur++ = '&'; *out->cur++ = 'l'; *out->cur++ = 't'; + *out->cur++ = ';'; break; case '>': + *out->cur++ = '&'; *out->cur++ = 'g'; *out->cur++ = 't'; + *out->cur++ = ';'; break; default: - *out->cur++ = '#'; - *out->cur++ = 'x'; - *out->cur++ = '0'; - *out->cur++ = '0'; - dump_hex(*value, out); + // Must be one of the invalid characters. + if (StrictEffort == out->opts->effort) { + rb_raise(rb_eSyntaxError, "'\\#x%02x' is not a valid XML character.", *value); + } + if (Yes == out->opts->allow_invalid) { + *out->cur++ = '&'; + *out->cur++ = '#'; + *out->cur++ = 'x'; + *out->cur++ = '0'; + *out->cur++ = '0'; + dump_hex(*value, out); + *out->cur++ = ';'; + } else if ('\0' != *out->opts->inv_repl) { + // If the empty string then ignore. The first character of + // the replacement is the length. + memcpy(out->cur, out->opts->inv_repl + 1, (size_t)*out->opts->inv_repl); + out->cur += *out->opts->inv_repl; + } break; } - *out->cur++ = ';'; } } *out->cur = '\0'; } @@ -1014,11 +1037,11 @@ *out->cur++ = '<'; fill_value(out, name, nlen); if (Qnil != attrs) { rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out); } - if (Qnil != nodes) { + if (Qnil != nodes && 0 < RARRAY_LEN(nodes)) { int do_indent; *out->cur++ = '>'; do_indent = dump_gen_nodes(nodes, depth, out); if (out->end - out->cur <= (long)size) { @@ -1197,9 +1220,10 @@ out->obj = obj; if (Yes == copts->circular) { ox_cache8_new(&out->circ_cache); } out->indent = copts->indent; + if (ox_document_clas == clas) { dump_gen_doc(obj, -1, out); } else if (ox_element_clas == clas) { dump_gen_element(obj, 0, out); } else {