ext/ox/dump.c in ox-2.2.4 vs ext/ox/dump.c in ox-2.3.0
- old
+ new
@@ -76,12 +76,14 @@
static int is_xml_friendly(const uchar *str, int len);
static const char hex_chars[17] = "0123456789abcdef";
+// The : character is equivalent to 10. Used for replacement characters up to 10
+// characters long such as ''.
static char xml_friendly_chars[257] = "\
-88888888811881888888888888888888\
+:::::::::11::1::::::::::::::::::\
11611156111111111111111111114141\
11111111111111111111111111111111\
11111111111111111111111111111111\
11111111111111111111111111111111\
11111111111111111111111111111111\
@@ -313,54 +315,75 @@
}
inline static void
dump_str_value(Out out, const char *value, size_t size) {
size_t xsize = xml_str_len((const uchar*)value, size);
-
+
if (out->end - out->cur <= (long)xsize) {
grow(out, xsize);
}
for (; '\0' != *value; value++) {
if ('1' == xml_friendly_chars[(uchar)*value]) {
*out->cur++ = *value;
} else {
- *out->cur++ = '&';
switch (*value) {
case '"':
+ *out->cur++ = '&';
*out->cur++ = 'q';
*out->cur++ = 'u';
*out->cur++ = 'o';
*out->cur++ = 't';
+ *out->cur++ = ';';
break;
case '&':
+ *out->cur++ = '&';
*out->cur++ = 'a';
*out->cur++ = 'm';
*out->cur++ = 'p';
+ *out->cur++ = ';';
break;
case '\'':
+ *out->cur++ = '&';
*out->cur++ = 'a';
*out->cur++ = 'p';
*out->cur++ = 'o';
*out->cur++ = 's';
+ *out->cur++ = ';';
break;
case '<':
+ *out->cur++ = '&';
*out->cur++ = 'l';
*out->cur++ = 't';
+ *out->cur++ = ';';
break;
case '>':
+ *out->cur++ = '&';
*out->cur++ = 'g';
*out->cur++ = 't';
+ *out->cur++ = ';';
break;
default:
- *out->cur++ = '#';
- *out->cur++ = 'x';
- *out->cur++ = '0';
- *out->cur++ = '0';
- dump_hex(*value, out);
+ // Must be one of the invalid characters.
+ if (StrictEffort == out->opts->effort) {
+ rb_raise(rb_eSyntaxError, "'\\#x%02x' is not a valid XML character.", *value);
+ }
+ if (Yes == out->opts->allow_invalid) {
+ *out->cur++ = '&';
+ *out->cur++ = '#';
+ *out->cur++ = 'x';
+ *out->cur++ = '0';
+ *out->cur++ = '0';
+ dump_hex(*value, out);
+ *out->cur++ = ';';
+ } else if ('\0' != *out->opts->inv_repl) {
+ // If the empty string then ignore. The first character of
+ // the replacement is the length.
+ memcpy(out->cur, out->opts->inv_repl + 1, (size_t)*out->opts->inv_repl);
+ out->cur += *out->opts->inv_repl;
+ }
break;
}
- *out->cur++ = ';';
}
}
*out->cur = '\0';
}
@@ -1014,11 +1037,11 @@
*out->cur++ = '<';
fill_value(out, name, nlen);
if (Qnil != attrs) {
rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out);
}
- if (Qnil != nodes) {
+ if (Qnil != nodes && 0 < RARRAY_LEN(nodes)) {
int do_indent;
*out->cur++ = '>';
do_indent = dump_gen_nodes(nodes, depth, out);
if (out->end - out->cur <= (long)size) {
@@ -1197,9 +1220,10 @@
out->obj = obj;
if (Yes == copts->circular) {
ox_cache8_new(&out->circ_cache);
}
out->indent = copts->indent;
+
if (ox_document_clas == clas) {
dump_gen_doc(obj, -1, out);
} else if (ox_element_clas == clas) {
dump_gen_element(obj, 0, out);
} else {