ext/oj/fast.c in oj-2.12.1 vs ext/oj/fast.c in oj-2.12.2

- old
+ new

@@ -168,11 +168,11 @@ } inline static void leaf_init(Leaf leaf, int type) { leaf->next = 0; - leaf->type = type; + leaf->rtype = type; leaf->parent_type = T_NONE; switch (type) { case T_ARRAY: case T_HASH: leaf->elements = 0; @@ -204,10 +204,12 @@ Leaf leaf; if (0 == doc->batches || BATCH_SIZE == doc->batches->next_avail) { Batch b = ALLOC(struct _Batch); + // Initializes all leaves with a NO_VAL value_type + memset(b, 0, sizeof(struct _Batch)); b->next = doc->batches; doc->batches = b; b->next_avail = 0; } leaf = &doc->batches->leaves[doc->batches->next_avail]; @@ -230,11 +232,11 @@ } static VALUE leaf_value(Doc doc, Leaf leaf) { if (RUBY_VAL != leaf->value_type) { - switch (leaf->type) { + switch (leaf->rtype) { case T_NIL: leaf->value = Qnil; break; case T_TRUE: leaf->value = Qtrue; @@ -258,11 +260,11 @@ break; case T_HASH: return leaf_hash_value(doc, leaf); break; default: - rb_raise(rb_const_get_at(Oj, rb_intern("Error")), "Unexpected type %02x.", leaf->type); + rb_raise(rb_const_get_at(Oj, rb_intern("Error")), "Unexpected type %02x.", leaf->rtype); break; } } return leaf->value; } @@ -346,83 +348,15 @@ leaf->value = LONG2NUM(n); } leaf->value_type = RUBY_VAL; } -#ifdef JRUBY_RUBY static void leaf_float_value(Leaf leaf) { - char *s = leaf->str; - int64_t n = 0; - long a = 0; - long div = 1; - long e = 0; - int neg = 0; - int eneg = 0; - int big = 0; - - if ('-' == *s) { - s++; - neg = 1; - } else if ('+' == *s) { - s++; - } - for (; '0' <= *s && *s <= '9'; s++) { - n = n * 10 + (*s - '0'); - if (NUM_MAX <= n) { - big = 1; - } - } - if (big) { - char c = *s; - - *s = '\0'; - leaf->value = rb_cstr_to_inum(leaf->str, 10, 0); - *s = c; - } else { - double d; - - if ('.' == *s) { - s++; - for (; '0' <= *s && *s <= '9'; s++) { - a = a * 10 + (*s - '0'); - div *= 10; - } - } - if ('e' == *s || 'E' == *s) { - s++; - if ('-' == *s) { - s++; - eneg = 1; - } else if ('+' == *s) { - s++; - } - for (; '0' <= *s && *s <= '9'; s++) { - e = e * 10 + (*s - '0'); - } - } - d = (double)n + (double)a / (double)div; - if (neg) { - d = -d; - } - if (0 != e) { - if (eneg) { - e = -e; - } - d *= pow(10.0, e); - } - leaf->value = rb_float_new(d); - } - leaf->value_type = RUBY_VAL; -} -#else -static void -leaf_float_value(Leaf leaf) { leaf->value = rb_float_new(rb_cstr_to_dbl(leaf->str, 1)); leaf->value_type = RUBY_VAL; } -#endif static VALUE leaf_array_value(Doc doc, Leaf leaf) { VALUE a = rb_ary_new(); @@ -673,37 +607,63 @@ pi->s += 3; return leaf; } -static char -read_hex(ParseInfo pi, char *h) { - uint8_t b = 0; +static uint32_t +read_4hex(ParseInfo pi, const char *h) { + uint32_t b = 0; + int i; - if ('0' <= *h && *h <= '9') { - b = *h - '0'; - } else if ('A' <= *h && *h <= 'F') { - b = *h - 'A' + 10; - } else if ('a' <= *h && *h <= 'f') { - b = *h - 'a' + 10; - } else { - pi->s = h; - raise_error("invalid hex character", pi->str, pi->s); + for (i = 0; i < 4; i++, h++) { + b = b << 4; + if ('0' <= *h && *h <= '9') { + b += *h - '0'; + } else if ('A' <= *h && *h <= 'F') { + b += *h - 'A' + 10; + } else if ('a' <= *h && *h <= 'f') { + b += *h - 'a' + 10; + } else { + raise_error("invalid hex character", pi->str, pi->s); + } } - h++; - b = b << 4; - if ('0' <= *h && *h <= '9') { - b += *h - '0'; - } else if ('A' <= *h && *h <= 'F') { - b += *h - 'A' + 10; - } else if ('a' <= *h && *h <= 'f') { - b += *h - 'a' + 10; + return b; +} + +static char* +unicode_to_chars(ParseInfo pi, char *t, uint32_t code) { + if (0x0000007F >= code) { + *t++ = (char)code; + } else if (0x000007FF >= code) { + *t++ = 0xC0 | (code >> 6); + *t++ = 0x80 | (0x3F & code); + } else if (0x0000FFFF >= code) { + *t++ = 0xE0 | (code >> 12); + *t++ = 0x80 | ((code >> 6) & 0x3F); + *t++ = 0x80 | (0x3F & code); + } else if (0x001FFFFF >= code) { + *t++ = 0xF0 | (code >> 18); + *t++ = 0x80 | ((code >> 12) & 0x3F); + *t++ = 0x80 | ((code >> 6) & 0x3F); + *t++ = 0x80 | (0x3F & code); + } else if (0x03FFFFFF >= code) { + *t++ = 0xF8 | (code >> 24); + *t++ = 0x80 | ((code >> 18) & 0x3F); + *t++ = 0x80 | ((code >> 12) & 0x3F); + *t++ = 0x80 | ((code >> 6) & 0x3F); + *t++ = 0x80 | (0x3F & code); + } else if (0x7FFFFFFF >= code) { + *t++ = 0xFC | (code >> 30); + *t++ = 0x80 | ((code >> 24) & 0x3F); + *t++ = 0x80 | ((code >> 18) & 0x3F); + *t++ = 0x80 | ((code >> 12) & 0x3F); + *t++ = 0x80 | ((code >> 6) & 0x3F); + *t++ = 0x80 | (0x3F & code); } else { - pi->s = h; - raise_error("invalid hex character", pi->str, pi->s); + raise_error("invalid Unicode character", pi->str, pi->s); } - return (char)b; + return t; } /* Assume the value starts immediately and goes until the quote character is * reached again. Do not read the character after the terminating quote. */ @@ -729,20 +689,35 @@ case 'f': *t = '\f'; break; case 'b': *t = '\b'; break; case '"': *t = '"'; break; case '/': *t = '/'; break; case '\\': *t = '\\'; break; - case 'u': + case 'u': { + uint32_t code; + h++; - *t = read_hex(pi, h); - h += 2; - if ('\0' != *t) { - t++; + code = read_4hex(pi, h); + h += 3; + if (0x0000D800 <= code && code <= 0x0000DFFF) { + uint32_t c1 = (code - 0x0000D800) & 0x000003FF; + uint32_t c2; + + h++; + if ('\\' != *h || 'u' != *(h + 1)) { + pi->s = h; + raise_error("invalid escaped character", pi->str, pi->s); + } + h += 2; + c2 = read_4hex(pi, h); + h += 3; + c2 = (c2 - 0x0000DC00) & 0x000003FF; + code = ((c1 << 10) | c2) + 0x00010000; } - *t = read_hex(pi, h); - h++; + t = unicode_to_chars(pi, t, code); + t--; break; + } default: pi->s = h; raise_error("invalid escaped character", pi->str, pi->s); break; } @@ -757,19 +732,14 @@ } // doc support functions inline static void doc_init(Doc doc) { + memset(doc, 0, sizeof(struct _Doc)); doc->where = doc->where_path; - *doc->where = 0; - doc->data = 0; doc->self = Qundef; - doc->size = 0; - doc->json = 0; doc->batches = &doc->batch0; - doc->batch0.next = 0; - doc->batch0.next_avail = 0; } static void doc_free(Doc doc) { if (0 != doc) { @@ -883,11 +853,11 @@ size_t cnt = doc->where - doc->where_path; if (MAX_STACK <= cnt) { rb_raise(rb_const_get_at(Oj, rb_intern("DepthError")), "Path too deep. Limit is %d levels.", MAX_STACK); } - memcpy(stack, doc->where_path, sizeof(Leaf) * cnt); + memcpy(stack, doc->where_path, sizeof(Leaf) * (cnt + 1)); lp = stack + cnt; } return get_leaf(stack, lp, path); } return leaf; @@ -912,11 +882,11 @@ return 0; } } else if (COL_VAL == leaf->value_type && 0 != leaf->elements) { Leaf first = leaf->elements->next; Leaf e = first; - int type = leaf->type; + int type = leaf->rtype; leaf = 0; if (T_ARRAY == type) { int cnt = 0; @@ -1019,11 +989,11 @@ } } else if (COL_VAL == leaf->value_type && 0 != leaf->elements) { Leaf first = leaf->elements->next; Leaf e = first; - if (T_ARRAY == leaf->type) { + if (T_ARRAY == leaf->rtype) { int cnt = 0; for (; '0' <= *path && *path <= '9'; path++) { cnt = cnt * 10 + (*path - '0'); } @@ -1044,11 +1014,11 @@ break; } cnt--; e = e->next; } while (e != first); - } else if (T_HASH == leaf->type) { + } else if (T_HASH == leaf->rtype) { const char *key = path; const char *slash = strchr(path, '/'); int klen; if (0 == slash) { @@ -1300,11 +1270,11 @@ if (1 <= argc) { Check_Type(*argv, T_STRING); path = StringValuePtr(*argv); } if (0 != (leaf = get_doc_leaf(doc, path))) { - switch (leaf->type) { + switch (leaf->rtype) { case T_NIL: type = rb_cNilClass; break; case T_TRUE: type = rb_cTrueClass; break; case T_FALSE: type = rb_cFalseClass; break; case T_STRING: type = rb_cString; break; case T_FIXNUM: type = rb_cFixnum; break; @@ -1373,29 +1343,29 @@ const char *path = 0; size_t wlen; wlen = doc->where - doc->where_path; if (0 < wlen) { - memcpy(save_path, doc->where_path, sizeof(Leaf) * wlen); + memcpy(save_path, doc->where_path, sizeof(Leaf) * (wlen + 1)); } if (1 <= argc) { Check_Type(*argv, T_STRING); path = StringValuePtr(*argv); if ('/' == *path) { doc->where = doc->where_path; path++; } if (0 != move_step(doc, path, 1)) { if (0 < wlen) { - memcpy(doc->where_path, save_path, sizeof(Leaf) * wlen); + memcpy(doc->where_path, save_path, sizeof(Leaf) * (wlen + 1)); } return Qnil; } } each_leaf(doc, self); if (0 < wlen) { - memcpy(doc->where_path, save_path, sizeof(Leaf) * wlen); + memcpy(doc->where_path, save_path, sizeof(Leaf) * (wlen + 1)); } } return Qnil; } @@ -1449,22 +1419,22 @@ const char *path = 0; size_t wlen; wlen = doc->where - doc->where_path; if (0 < wlen) { - memcpy(save_path, doc->where_path, sizeof(Leaf) * wlen); + memcpy(save_path, doc->where_path, sizeof(Leaf) * (wlen + 1)); } if (1 <= argc) { Check_Type(*argv, T_STRING); path = StringValuePtr(*argv); if ('/' == *path) { doc->where = doc->where_path; path++; } if (0 != move_step(doc, path, 1)) { if (0 < wlen) { - memcpy(doc->where_path, save_path, sizeof(Leaf) * wlen); + memcpy(doc->where_path, save_path, sizeof(Leaf) * (wlen + 1)); } return Qnil; } } if (COL_VAL == (*doc->where)->value_type && 0 != (*doc->where)->elements) { @@ -1477,10 +1447,10 @@ rb_yield(self); e = e->next; } while (e != first); } if (0 < wlen) { - memcpy(doc->where_path, save_path, sizeof(Leaf) * wlen); + memcpy(doc->where_path, save_path, sizeof(Leaf) * (wlen + 1)); } } return Qnil; }