ext/json/ext/parser/parser.rl in json_pure-1.8.6 vs ext/json/ext/parser/parser.rl in json_pure-2.0.0

- old
+ new

@@ -1,10 +1,10 @@ #include "../fbuffer/fbuffer.h" #include "parser.h" #if defined HAVE_RUBY_ENCODING_H -# define EXC_ENCODING UTF_8, +# define EXC_ENCODING rb_utf8_encoding(), # ifndef HAVE_RB_ENC_RAISE static void enc_raise(rb_encoding *enc, VALUE exc, const char *fmt, ...) { va_list args; @@ -85,21 +85,15 @@ buf[0] = '?'; } return len; } -#ifdef HAVE_RUBY_ENCODING_H -static rb_encoding *UTF_8, *UTF_16BE, *UTF_16LE, *UTF_32BE, *UTF_32LE; -#else -static ID i_iconv; -#endif - static VALUE mJSON, mExt, cParser, eParserError, eNestingError; static VALUE CNaN, CInfinity, CMinusInfinity; static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_quirks_mode, + i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_object_class, i_array_class, i_key_p, i_deep_const_get, i_match, i_match_string, i_aset, i_aref, i_leftshift; %%{ machine JSON_common; @@ -241,11 +235,11 @@ if (np == NULL) { fhold; fbreak; } else fexec np; } action parse_number { char *np; - if(pe > fpc + 9 - json->quirks_mode && !strncmp(MinusInfinity, fpc, 9)) { + if(pe > fpc + 8 && !strncmp(MinusInfinity, fpc, 9)) { if (json->allow_nan) { *result = CMinusInfinity; fexec p + 10; fhold; fbreak; } else { @@ -275,21 +269,21 @@ if (np == NULL) { fhold; fbreak; } else fexec np; } action exit { fhold; fbreak; } -main := ( +main := ignore* ( Vnull @parse_null | Vfalse @parse_false | Vtrue @parse_true | VNaN @parse_nan | VInfinity @parse_infinity | begin_number >parse_number | begin_string >parse_string | begin_array >parse_array | begin_object >parse_object - ) %*exit; + ) ignore* %*exit; }%% static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; @@ -546,10 +540,12 @@ } } if (json->symbolize_names && json->parsing_name) { *result = rb_str_intern(*result); + } else { + rb_str_resize(*result, RSTRING_LEN(*result)); } if (cs >= JSON_string_first_final) { return p + 1; } else { return NULL; @@ -568,45 +564,17 @@ * */ static VALUE convert_encoding(VALUE source) { - const char *ptr = RSTRING_PTR(source); - long len = RSTRING_LEN(source); - if (len < 2) { - rb_raise(eParserError, "A JSON text must at least contain two octets!"); - } #ifdef HAVE_RUBY_ENCODING_H - { - rb_encoding *enc = rb_enc_get(source); - if (enc == rb_ascii8bit_encoding()) { - if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { - source = rb_str_conv_enc(source, UTF_32BE, rb_utf8_encoding()); - } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { - source = rb_str_conv_enc(source, UTF_16BE, rb_utf8_encoding()); - } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { - source = rb_str_conv_enc(source, UTF_32LE, rb_utf8_encoding()); - } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { - source = rb_str_conv_enc(source, UTF_16LE, rb_utf8_encoding()); - } else { - source = rb_str_dup(source); - FORCE_UTF8(source); - } - } else { - source = rb_str_conv_enc(source, rb_enc_get(source), rb_utf8_encoding()); - } - } -#else - if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32be"), source); - } else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16be"), source); - } else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-32le"), source); - } else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) { - source = rb_funcall(mJSON, i_iconv, 3, rb_str_new2("utf-8"), rb_str_new2("utf-16le"), source); - } + rb_encoding *enc = rb_enc_get(source); + if (enc == rb_ascii8bit_encoding()) { + FORCE_UTF8(source); + } else { + source = rb_str_conv_enc(source, NULL, rb_utf8_encoding()); + } #endif return source; } /* @@ -625,12 +593,13 @@ * defaults to 100. * * *allow_nan*: If set to true, allow NaN, Infinity and -Infinity in * defiance of RFC 4627 to be parsed by the Parser. This option defaults to * false. * * *symbolize_names*: If set to true, returns symbols for the names - * (keys) in a JSON object. Otherwise strings are returned, which is also - * the default. + * (keys) in a JSON object. Otherwise strings are returned, which is + * also the default. It's not possible to use this option in + * conjunction with the *create_additions* option. * * *create_additions*: If set to false, the Parser doesn't create * additions even if a matching class and create_id was found. This option * defaults to false. * * *object_class*: Defaults to Hash * * *array_class*: Defaults to Array @@ -677,23 +646,21 @@ if (option_given_p(opts, tmp)) { json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; } else { json->symbolize_names = 0; } - tmp = ID2SYM(i_quirks_mode); - if (option_given_p(opts, tmp)) { - VALUE quirks_mode = rb_hash_aref(opts, tmp); - json->quirks_mode = RTEST(quirks_mode) ? 1 : 0; - } else { - json->quirks_mode = 0; - } tmp = ID2SYM(i_create_additions); if (option_given_p(opts, tmp)) { json->create_additions = RTEST(rb_hash_aref(opts, tmp)); } else { json->create_additions = 0; } + if (json->symbolize_names && json->create_additions) { + rb_raise(rb_eArgError, + "options :symbolize_names and :create_additions cannot be " + " used in conjunction"); + } tmp = ID2SYM(i_create_id); if (option_given_p(opts, tmp)) { json->create_id = rb_hash_aref(opts, tmp); } else { json->create_id = rb_funcall(mJSON, i_create_id, 0); @@ -726,15 +693,13 @@ json->create_additions = 1; json->create_id = rb_funcall(mJSON, i_create_id, 0); json->object_class = Qnil; json->array_class = Qnil; } - StringValue(source); - if (!json->quirks_mode) { - source = convert_encoding(source); - } + source = convert_encoding(StringValue(source)); json->current_nesting = 0; + StringValue(source); json->len = RSTRING_LEN(source); json->source = RSTRING_PTR(source);; json->Vsource = source; return self; } @@ -744,102 +709,43 @@ write data; include JSON_common; - action parse_object { - char *np; - json->current_nesting = 1; - np = JSON_parse_object(json, fpc, pe, &result); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - action parse_array { - char *np; - json->current_nesting = 1; - np = JSON_parse_array(json, fpc, pe, &result); - if (np == NULL) { fhold; fbreak; } else fexec np; - } - - main := ignore* ( - begin_object >parse_object | - begin_array >parse_array - ) ignore*; -}%% - -static VALUE cParser_parse_strict(VALUE self) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (cs >= JSON_first_final && p == pe) { - return result; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return Qnil; - } -} - - -%%{ - machine JSON_quirks_mode; - - write data; - - include JSON_common; - action parse_value { char *np = JSON_parse_value(json, fpc, pe, &result); if (np == NULL) { fhold; fbreak; } else fexec np; } main := ignore* ( begin_value >parse_value ) ignore*; }%% -static VALUE cParser_parse_quirks_mode(VALUE self) -{ - char *p, *pe; - int cs = EVIL; - VALUE result = Qnil; - GET_PARSER; - - %% write init; - p = json->source; - pe = p + json->len; - %% write exec; - - if (cs >= JSON_quirks_mode_first_final && p == pe) { - return result; - } else { - rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); - return Qnil; - } -} - /* * call-seq: parse() * * Parses the current JSON text _source_ and returns the complete data * structure as a result. */ static VALUE cParser_parse(VALUE self) { + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; GET_PARSER; - if (json->quirks_mode) { - return cParser_parse_quirks_mode(self); + %% write init; + p = json->source; + pe = p + json->len; + %% write exec; + + if (cs >= JSON_first_final && p == pe) { + return result; } else { - return cParser_parse_strict(self); + rb_enc_raise(EXC_ENCODING eParserError, "%u: unexpected token at '%s'", __LINE__, p); + return Qnil; } } static void JSON_mark(void *ptr) { @@ -893,22 +799,10 @@ { GET_PARSER; return rb_str_dup(json->Vsource); } -/* - * call-seq: quirks_mode?() - * - * Returns a true, if this parser is in quirks_mode, false otherwise. - */ -static VALUE cParser_quirks_mode_p(VALUE self) -{ - GET_PARSER; - return json->quirks_mode ? Qtrue : Qfalse; -} - - void Init_parser(void) { rb_require("json/common"); mJSON = rb_define_module("JSON"); mExt = rb_define_module_under(mJSON, "Ext"); @@ -917,11 +811,10 @@ eNestingError = rb_path2class("JSON::NestingError"); rb_define_alloc_func(cParser, cJSON_parser_s_allocate); rb_define_method(cParser, "initialize", cParser_initialize, -1); rb_define_method(cParser, "parse", cParser_parse, 0); rb_define_method(cParser, "source", cParser_source, 0); - rb_define_method(cParser, "quirks_mode?", cParser_quirks_mode_p, 0); CNaN = rb_const_get(mJSON, rb_intern("NaN")); CInfinity = rb_const_get(mJSON, rb_intern("Infinity")); CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity")); @@ -931,28 +824,18 @@ i_create_additions = rb_intern("create_additions"); i_chr = rb_intern("chr"); i_max_nesting = rb_intern("max_nesting"); i_allow_nan = rb_intern("allow_nan"); i_symbolize_names = rb_intern("symbolize_names"); - i_quirks_mode = rb_intern("quirks_mode"); i_object_class = rb_intern("object_class"); i_array_class = rb_intern("array_class"); i_match = rb_intern("match"); i_match_string = rb_intern("match_string"); i_key_p = rb_intern("key?"); i_deep_const_get = rb_intern("deep_const_get"); i_aset = rb_intern("[]="); i_aref = rb_intern("[]"); i_leftshift = rb_intern("<<"); -#ifdef HAVE_RUBY_ENCODING_H - UTF_8 = rb_utf8_encoding(); - UTF_16BE = rb_enc_find("utf-16be"); - UTF_16LE = rb_enc_find("utf-16le"); - UTF_32BE = rb_enc_find("utf-32be"); - UTF_32LE = rb_enc_find("utf-32le"); -#else - i_iconv = rb_intern("iconv"); -#endif } /* * Local variables: * mode: c