ext/json/ext/parser/parser.rl in json_pure-1.5.3 vs ext/json/ext/parser/parser.rl in json_pure-1.5.4
- old
+ new
@@ -1,10 +1,10 @@
#include "parser.h"
/* unicode */
-static const char digit_values[256] = {
+static const char digit_values[256] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1,
-1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -36,11 +36,11 @@
result = (result << 4) | b;
if (b < 0) return UNI_REPLACEMENT_CHAR;
return result;
}
-static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
+static int convert_UTF32_to_UTF8(char *buf, UTF32 ch)
{
int len = 1;
if (ch <= 0x7F) {
buf[0] = (char) ch;
} else if (ch <= 0x07FF) {
@@ -65,21 +65,22 @@
}
#ifdef HAVE_RUBY_ENCODING_H
static VALUE CEncoding_ASCII_8BIT, CEncoding_UTF_8, CEncoding_UTF_16BE,
CEncoding_UTF_16LE, CEncoding_UTF_32BE, CEncoding_UTF_32LE;
-static ID i_encoding, i_encode, i_encode_bang, i_force_encoding;
+static ID i_encoding, i_encode;
#else
static ID i_iconv;
#endif
static VALUE mJSON, mExt, cParser, eParserError, eNestingError;
static VALUE CNaN, CInfinity, CMinusInfinity;
static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_object_class,
- i_array_class, i_key_p, i_deep_const_get, i_match, i_match_string, i_aset, i_leftshift;
+ i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, i_quirks_mode,
+ i_object_class, i_array_class, i_key_p, i_deep_const_get, i_match,
+ i_match_string, i_aset, i_leftshift;
%%{
machine JSON_common;
cr = '\n';
@@ -95,11 +96,11 @@
Vfalse = 'false';
Vtrue = 'true';
VNaN = 'NaN';
VInfinity = 'Infinity';
VMinusInfinity = '-Infinity';
- begin_value = [nft"\-[{NI] | digit;
+ begin_value = [nft\"\-\[\{NI] | digit;
begin_object = '{';
end_object = '}';
begin_array = '[';
end_array = ']';
begin_string = '"';
@@ -113,18 +114,18 @@
write data;
action parse_value {
VALUE v = Qnil;
- char *np = JSON_parse_value(json, fpc, pe, &v);
+ char *np = JSON_parse_value(json, fpc, pe, &v);
if (np == NULL) {
fhold; fbreak;
} else {
if (NIL_P(json->object_class)) {
- rb_hash_aset(*result, last_name, v);
+ rb_hash_aset(*result, last_name, v);
} else {
- rb_funcall(*result, i_aset, 2, last_name, v);
+ rb_funcall(*result, i_aset, 2, last_name, v);
}
fexec np;
}
}
@@ -136,17 +137,18 @@
if (np == NULL) { fhold; fbreak; } else fexec np;
}
action exit { fhold; fbreak; }
- a_pair = ignore* begin_name >parse_name
- ignore* name_separator ignore*
- begin_value >parse_value;
+ pair = ignore* begin_name >parse_name ignore* name_separator ignore* begin_value >parse_value;
+ next_pair = ignore* value_separator pair;
- main := begin_object
- (a_pair (ignore* value_separator a_pair)*)?
- ignore* end_object @exit;
+ main := (
+ begin_object
+ (pair (next_pair)*)? ignore*
+ end_object
+ ) @exit;
}%%
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result)
{
int cs = EVIL;
@@ -176,10 +178,11 @@
} else {
return NULL;
}
}
+
%%{
machine JSON_value;
include JSON_common;
write data;
@@ -212,11 +215,11 @@
if (np == NULL) { fhold; fbreak; } else fexec np;
}
action parse_number {
char *np;
- if(pe > fpc + 9 && !strncmp(MinusInfinity, fpc, 9)) {
+ if(pe > fpc + 9 - json->quirks_mode && !strncmp(MinusInfinity, fpc, 9)) {
if (json->allow_nan) {
*result = CMinusInfinity;
fexec p + 10;
fhold; fbreak;
} else {
@@ -228,19 +231,19 @@
np = JSON_parse_integer(json, fpc, pe, result);
if (np != NULL) fexec np;
fhold; fbreak;
}
- action parse_array {
+ action parse_array {
char *np;
json->current_nesting++;
np = JSON_parse_array(json, fpc, pe, result);
json->current_nesting--;
if (np == NULL) { fhold; fbreak; } else fexec np;
}
- action parse_object {
+ action parse_object {
char *np;
json->current_nesting++;
np = JSON_parse_object(json, fpc, pe, result);
json->current_nesting--;
if (np == NULL) { fhold; fbreak; } else fexec np;
@@ -280,11 +283,11 @@
write data;
action exit { fhold; fbreak; }
- main := '-'? ('0' | [1-9][0-9]*) (^[0-9] @exit);
+ main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit);
}%%
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
{
int cs = EVIL;
@@ -311,11 +314,11 @@
action exit { fhold; fbreak; }
main := '-'? (
(('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?)
| (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+))
- ) (^[0-9Ee.\-] @exit );
+ ) (^[0-9Ee.\-]? @exit );
}%%
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
{
int cs = EVIL;
@@ -340,11 +343,11 @@
write data;
action parse_value {
VALUE v = Qnil;
- char *np = JSON_parse_value(json, fpc, pe, &v);
+ char *np = JSON_parse_value(json, fpc, pe, &v);
if (np == NULL) {
fhold; fbreak;
} else {
if (NIL_P(json->array_class)) {
rb_ary_push(*result, v);
@@ -417,11 +420,11 @@
break;
case 'f':
unescape = (char *) "\f";
break;
case 'u':
- if (pe > stringEnd - 4) {
+ if (pe > stringEnd - 4) {
return Qnil;
} else {
char buf[4];
UTF32 ch = unescape_unicode((unsigned char *) ++pe);
pe += 3;
@@ -473,11 +476,11 @@
}
}
action exit { fhold; fbreak; }
- main := '"' ((^(["\\] | 0..0x1f) | '\\'["\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^(["\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
+ main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
}%%
static int
match_i(VALUE regexp, VALUE klass, VALUE memo)
{
@@ -519,39 +522,11 @@
} else {
return NULL;
}
}
-
-%%{
- machine JSON;
-
- write data;
-
- include JSON_common;
-
- action parse_object {
- char *np;
- json->current_nesting = 1;
- np = JSON_parse_object(json, fpc, pe, &result);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- action parse_array {
- char *np;
- json->current_nesting = 1;
- np = JSON_parse_array(json, fpc, pe, &result);
- if (np == NULL) { fhold; fbreak; } else fexec np;
- }
-
- main := ignore* (
- begin_object >parse_object |
- begin_array >parse_array
- ) ignore*;
-}%%
-
-/*
+/*
* Document-class: JSON::Ext::Parser
*
* This is the JSON parser implemented as a C extension. It can be configured
* to be used by setting
*
@@ -571,26 +546,19 @@
#ifdef HAVE_RUBY_ENCODING_H
{
VALUE encoding = rb_funcall(source, i_encoding, 0);
if (encoding == CEncoding_ASCII_8BIT) {
if (len >= 4 && ptr[0] == 0 && ptr[1] == 0 && ptr[2] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32BE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
+ source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32BE);
} else if (len >= 4 && ptr[0] == 0 && ptr[2] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16BE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
+ source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16BE);
} else if (len >= 4 && ptr[1] == 0 && ptr[2] == 0 && ptr[3] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_32LE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
+ source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_32LE);
} else if (len >= 4 && ptr[1] == 0 && ptr[3] == 0) {
- source = rb_str_dup(source);
- rb_funcall(source, i_force_encoding, 1, CEncoding_UTF_16LE);
- source = rb_funcall(source, i_encode_bang, 1, CEncoding_UTF_8);
+ source = rb_funcall(source, i_encode, 2, CEncoding_UTF_8, CEncoding_UTF_16LE);
} else {
+ source = rb_str_dup(source);
FORCE_UTF8(source);
}
} else {
source = rb_funcall(source, i_encode, 1, CEncoding_UTF_8);
}
@@ -635,18 +603,17 @@
* * *object_class*: Defaults to Hash
* * *array_class*: Defaults to Array
*/
static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
{
- char *ptr;
- long len;
VALUE source, opts;
- GET_PARSER;
+ GET_PARSER_INIT;
+
+ if (json->Vsource) {
+ rb_raise(rb_eTypeError, "already initialized instance");
+ }
rb_scan_args(argc, argv, "11", &source, &opts);
- source = convert_encoding(StringValue(source));
- ptr = RSTRING_PTR(source);
- len = RSTRING_LEN(source);
if (!NIL_P(opts)) {
opts = rb_convert_type(opts, T_HASH, "Hash", "to_hash");
if (NIL_P(opts)) {
rb_raise(rb_eArgError, "opts needs to be like a hash");
} else {
@@ -672,10 +639,17 @@
if (option_given_p(opts, tmp)) {
json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
} else {
json->symbolize_names = 0;
}
+ tmp = ID2SYM(i_quirks_mode);
+ if (option_given_p(opts, tmp)) {
+ VALUE quirks_mode = rb_hash_aref(opts, tmp);
+ json->quirks_mode = RTEST(quirks_mode) ? 1 : 0;
+ } else {
+ json->quirks_mode = 0;
+ }
tmp = ID2SYM(i_create_additions);
if (option_given_p(opts, tmp)) {
json->create_additions = RTEST(rb_hash_aref(opts, tmp));
} else {
json->create_additions = 1;
@@ -712,24 +686,48 @@
json->create_additions = 1;
json->create_id = rb_funcall(mJSON, i_create_id, 0);
json->object_class = Qnil;
json->array_class = Qnil;
}
+ if (!json->quirks_mode) {
+ source = convert_encoding(StringValue(source));
+ }
json->current_nesting = 0;
- json->len = len;
- json->source = ptr;
+ json->len = RSTRING_LEN(source);
+ json->source = RSTRING_PTR(source);;
json->Vsource = source;
return self;
}
-/*
- * call-seq: parse()
- *
- * Parses the current JSON text _source_ and returns the complete data
- * structure as a result.
- */
-static VALUE cParser_parse(VALUE self)
+%%{
+ machine JSON;
+
+ write data;
+
+ include JSON_common;
+
+ action parse_object {
+ char *np;
+ json->current_nesting = 1;
+ np = JSON_parse_object(json, fpc, pe, &result);
+ if (np == NULL) { fhold; fbreak; } else fexec np;
+ }
+
+ action parse_array {
+ char *np;
+ json->current_nesting = 1;
+ np = JSON_parse_array(json, fpc, pe, &result);
+ if (np == NULL) { fhold; fbreak; } else fexec np;
+ }
+
+ main := ignore* (
+ begin_object >parse_object |
+ begin_array >parse_array
+ ) ignore*;
+}%%
+
+static VALUE cParser_parse_strict(VALUE self)
{
char *p, *pe;
int cs = EVIL;
VALUE result = Qnil;
GET_PARSER;
@@ -745,10 +743,66 @@
rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
return Qnil;
}
}
+
+%%{
+ machine JSON_quirks_mode;
+
+ write data;
+
+ include JSON_common;
+
+ action parse_value {
+ char *np = JSON_parse_value(json, fpc, pe, &result);
+ if (np == NULL) { fhold; fbreak; } else fexec np;
+ }
+
+ main := ignore* (
+ begin_value >parse_value
+ ) ignore*;
+}%%
+
+static VALUE cParser_parse_quirks_mode(VALUE self)
+{
+ char *p, *pe;
+ int cs = EVIL;
+ VALUE result = Qnil;
+ GET_PARSER;
+
+ %% write init;
+ p = json->source;
+ pe = p + json->len;
+ %% write exec;
+
+ if (cs >= JSON_quirks_mode_first_final && p == pe) {
+ return result;
+ } else {
+ rb_raise(eParserError, "%u: unexpected token at '%s'", __LINE__, p);
+ return Qnil;
+ }
+}
+
+/*
+ * call-seq: parse()
+ *
+ * Parses the current JSON text _source_ and returns the complete data
+ * structure as a result.
+ */
+static VALUE cParser_parse(VALUE self)
+{
+ GET_PARSER;
+
+ if (json->quirks_mode) {
+ return cParser_parse_quirks_mode(self);
+ } else {
+ return cParser_parse_strict(self);
+ }
+}
+
+
static JSON_Parser *JSON_allocate()
{
JSON_Parser *json = ALLOC(JSON_Parser);
MEMZERO(json, JSON_Parser, 1);
return json;
@@ -784,10 +838,22 @@
{
GET_PARSER;
return rb_str_dup(json->Vsource);
}
+/*
+ * call-seq: quirks_mode?()
+ *
+ * Returns a true, if this parser is in quirks_mode, false otherwise.
+ */
+static VALUE cParser_quirks_mode_p(VALUE self)
+{
+ GET_PARSER;
+ return json->quirks_mode ? Qtrue : Qfalse;
+}
+
+
void Init_parser()
{
rb_require("json/common");
mJSON = rb_define_module("JSON");
mExt = rb_define_module_under(mJSON, "Ext");
@@ -796,10 +862,11 @@
eNestingError = rb_path2class("JSON::NestingError");
rb_define_alloc_func(cParser, cJSON_parser_s_allocate);
rb_define_method(cParser, "initialize", cParser_initialize, -1);
rb_define_method(cParser, "parse", cParser_parse, 0);
rb_define_method(cParser, "source", cParser_source, 0);
+ rb_define_method(cParser, "quirks_mode?", cParser_quirks_mode_p, 0);
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
CInfinity = rb_const_get(mJSON, rb_intern("Infinity"));
CMinusInfinity = rb_const_get(mJSON, rb_intern("MinusInfinity"));
@@ -809,10 +876,11 @@
i_create_additions = rb_intern("create_additions");
i_chr = rb_intern("chr");
i_max_nesting = rb_intern("max_nesting");
i_allow_nan = rb_intern("allow_nan");
i_symbolize_names = rb_intern("symbolize_names");
+ i_quirks_mode = rb_intern("quirks_mode");
i_object_class = rb_intern("object_class");
i_array_class = rb_intern("array_class");
i_match = rb_intern("match");
i_match_string = rb_intern("match_string");
i_key_p = rb_intern("key?");
@@ -826,11 +894,17 @@
CEncoding_UTF_32BE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32be"));
CEncoding_UTF_32LE = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("utf-32le"));
CEncoding_ASCII_8BIT = rb_funcall(rb_path2class("Encoding"), rb_intern("find"), 1, rb_str_new2("ascii-8bit"));
i_encoding = rb_intern("encoding");
i_encode = rb_intern("encode");
- i_encode_bang = rb_intern("encode!");
- i_force_encoding = rb_intern("force_encoding");
#else
i_iconv = rb_intern("iconv");
#endif
}
+
+/*
+ * Local variables:
+ * mode: c
+ * c-file-style: ruby
+ * indent-tabs-mode: nil
+ * End:
+ */