java/src/json/ext/Parser.rl in json_pure-1.5.3 vs java/src/json/ext/Parser.rl in json_pure-1.5.4

- old
+ new

@@ -1,8 +1,8 @@ /* * This code is copyrighted work by Daniel Luz <dev at mernen dot com>. - * + * * Distributed under the Ruby and GPLv2 licenses; see COPYING and GPL files * for details. */ package json.ext; @@ -27,30 +27,31 @@ import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; /** * The <code>JSON::Ext::Parser</code> class. - * + * * <p>This is the JSON parser implemented as a Java class. To use it as the * standard parser, set * <pre>JSON.parser = JSON::Ext::Parser</pre> * This is performed for you when you <code>include "json/ext"</code>. - * + * * <p>This class does not perform the actual parsing, just acts as an interface * to Ruby code. When the {@link #parse()} method is invoked, a * Parser.ParserSession object is instantiated, which handles the process. - * + * * @author mernen */ public class Parser extends RubyObject { private final RuntimeInfo info; private RubyString vSource; private RubyString createId; private boolean createAdditions; private int maxNesting; private boolean allowNaN; private boolean symbolizeNames; + private boolean quirksMode; private RubyClass objectClass; private RubyClass arrayClass; private RubyHash match_string; private static final int DEFAULT_MAX_NESTING = 19; @@ -67,11 +68,11 @@ } }; /** * Multiple-value return for internal parser methods. - * + * * <p>All the <code>parse<var>Stuff</var></code> methods return instances of * <code>ParserResult</code> when successful, or <code>null</code> when * there's a problem with the input data. */ static final class ParserResult { @@ -96,41 +97,49 @@ info = RuntimeInfo.forRuntime(runtime); } /** * <code>Parser.new(source, opts = {})</code> - * + * * <p>Creates a new <code>JSON::Ext::Parser</code> instance for the string * <code>source</code>. * It will be configured by the <code>opts</code> Hash. * <code>opts</code> can have the following keys: - * + * * <dl> * <dt><code>:max_nesting</code> * <dd>The maximum depth of nesting allowed in the parsed data * structures. Disable depth checking with <code>:max_nesting => false|nil|0</code>, * it defaults to 19. - * + * * <dt><code>:allow_nan</code> * <dd>If set to <code>true</code>, allow <code>NaN</code>, * <code>Infinity</code> and <code>-Infinity</code> in defiance of RFC 4627 * to be parsed by the Parser. This option defaults to <code>false</code>. * * <dt><code>:symbolize_names</code> * <dd>If set to <code>true</code>, returns symbols for the names (keys) in * a JSON object. Otherwise strings are returned, which is also the default. + * + * <dt><code>:quirks_mode?</code> + * <dd>If set to <code>true</code>, if the parse is in quirks_mode, false + * otherwise. * * <dt><code>:create_additions</code> * <dd>If set to <code>false</code>, the Parser doesn't create additions * even if a matchin class and <code>create_id</code> was found. This option * defaults to <code>true</code>. - * + * * <dt><code>:object_class</code> * <dd>Defaults to Hash. - * + * * <dt><code>:array_class</code> * <dd>Defaults to Array. + * + * <dt><code>:quirks_mode</code> + * <dd>Enables quirks_mode for parser, that is for example parsing single + * JSON values instead of documents is possible. * </dl> */ @JRubyMethod(name = "new", required = 1, optional = 1, meta = true) public static IRubyObject newInstance(IRubyObject clazz, IRubyObject[] args, Block block) { Parser parser = (Parser)((RubyClass)clazz).allocate(); @@ -140,24 +149,29 @@ return parser; } @JRubyMethod(required = 1, optional = 1, visibility = Visibility.PRIVATE) public IRubyObject initialize(ThreadContext context, IRubyObject[] args) { - Ruby runtime = context.getRuntime(); - RubyString source = convertEncoding(context, args[0].convertToString()); + Ruby runtime = context.getRuntime(); + if (this.vSource != null) { + throw runtime.newTypeError("already initialized instance"); + } OptionsReader opts = new OptionsReader(context, args.length > 1 ? args[1] : null); this.maxNesting = opts.getInt("max_nesting", DEFAULT_MAX_NESTING); this.allowNaN = opts.getBool("allow_nan", false); this.symbolizeNames = opts.getBool("symbolize_names", false); + this.quirksMode = opts.getBool("quirks_mode", false); this.createId = opts.getString("create_id", getCreateId(context)); this.createAdditions = opts.getBool("create_additions", true); this.objectClass = opts.getClass("object_class", runtime.getHash()); this.arrayClass = opts.getClass("array_class", runtime.getArray()); this.match_string = opts.getHash("match_string"); - this.vSource = source; + this.vSource = args[0].convertToString(); + if (!quirksMode) this.vSource = convertEncoding(context, vSource); + return this; } /** * Checks the given string's encoding. If a non-UTF-8 encoding is detected, @@ -172,23 +186,23 @@ "A JSON text must at least contain two octets!"); } if (info.encodingsSupported()) { RubyEncoding encoding = (RubyEncoding)source.encoding(context); - if (encoding != info.ascii8bit) { - return (RubyString)source.encode(context, info.utf8); + if (encoding != info.ascii8bit.get()) { + return (RubyString)source.encode(context, info.utf8.get()); } String sniffedEncoding = sniffByteList(bl); if (sniffedEncoding == null) return source; // assume UTF-8 return reinterpretEncoding(context, source, sniffedEncoding); } String sniffedEncoding = sniffByteList(bl); if (sniffedEncoding == null) return source; // assume UTF-8 Ruby runtime = context.getRuntime(); - return (RubyString)info.jsonModule. + return (RubyString)info.jsonModule.get(). callMethod(context, "iconv", new IRubyObject[] { runtime.newString("utf-8"), runtime.newString(sniffedEncoding), source}); @@ -214,50 +228,69 @@ * converts it to UTF-8. */ private RubyString reinterpretEncoding(ThreadContext context, RubyString str, String sniffedEncoding) { RubyEncoding actualEncoding = info.getEncoding(context, sniffedEncoding); - RubyEncoding targetEncoding = info.utf8; + RubyEncoding targetEncoding = info.utf8.get(); RubyString dup = (RubyString)str.dup(); dup.force_encoding(context, actualEncoding); return (RubyString)dup.encode_bang(context, targetEncoding); } /** * <code>Parser#parse()</code> - * + * * <p>Parses the current JSON text <code>source</code> and returns the * complete data structure as a result. */ @JRubyMethod public IRubyObject parse(ThreadContext context) { return new ParserSession(this, context).parse(); } /** * <code>Parser#source()</code> - * + * * <p>Returns a copy of the current <code>source</code> string, that was * used to construct this Parser. */ @JRubyMethod(name = "source") public IRubyObject source_get() { - return vSource.dup(); + return checkAndGetSource().dup(); } /** + * <code>Parser#quirks_mode?()</code> + * + * <p>If set to <code>true</code>, if the parse is in quirks_mode, false + * otherwise. + */ + @JRubyMethod(name = "quirks_mode?") + public IRubyObject quirks_mode_p(ThreadContext context) { + return context.getRuntime().newBoolean(quirksMode); + } + + public RubyString checkAndGetSource() { + if (vSource != null) { + return vSource; + } else { + throw getRuntime().newTypeError("uninitialized instance"); + } + } + + /** * Queries <code>JSON.create_id</code>. Returns <code>null</code> if it is * set to <code>nil</code> or <code>false</code>, and a String if not. */ private RubyString getCreateId(ThreadContext context) { - IRubyObject v = info.jsonModule.callMethod(context, "create_id"); + IRubyObject v = info.jsonModule.get().callMethod(context, "create_id"); return v.isTrue() ? v.convertToString() : null; } /** * A string parsing session. - * + * * <p>Once a ParserSession is instantiated, the source string should not * change until the parsing is complete. The ParserSession object assumes * the source {@link RubyString} is still associated to its original * {@link ByteList}, which in turn must still be bound to the same * <code>byte[]</code> value (and on the same offset). @@ -277,11 +310,11 @@ private static final int EVIL = 0x666; private ParserSession(Parser parser, ThreadContext context) { this.parser = parser; this.context = context; - this.byteList = parser.vSource.getByteList(); + this.byteList = parser.checkAndGetSource().getByteList(); this.data = byteList.unsafeBytes(); this.decoder = new StringDecoder(context); } private RaiseException unexpectedToken(int absStart, int absEnd) { @@ -351,11 +384,11 @@ } else { throw unexpectedToken(p - 7, pe); } } action parse_number { - if (pe > fpc + 9 && + if (pe > fpc + 9 - (parser.quirksMode ? 1 : 0) && absSubSequence(fpc, fpc + 9).toString().equals(JSON_MINUS_INFINITY)) { if (parser.allowNaN) { result = getConstant(CONST_MINUS_INFINITY); fexec p + 10; @@ -451,11 +484,11 @@ action exit { fhold; fbreak; } - main := '-'? ( '0' | [1-9][0-9]* ) ( ^[0-9] @exit ); + main := '-'? ( '0' | [1-9][0-9]* ) ( ^[0-9]? @exit ); }%% ParserResult parseInteger(int p, int pe) { int cs = EVIL; @@ -487,11 +520,11 @@ } main := '-'? ( ( ( '0' | [1-9][0-9]* ) '.' [0-9]+ ( [Ee] [+\-]?[0-9]+ )? ) | ( ( '0' | [1-9][0-9]* ) ( [Ee] [+\-]? [0-9]+ ) ) ) - ( ^[0-9Ee.\-] @exit ); + ( ^[0-9Ee.\-]? @exit ); }%% ParserResult parseFloat(int p, int pe) { int cs = EVIL; @@ -565,11 +598,11 @@ throw JumpException.SPECIAL_JUMP; } } }); } catch (JumpException e) { } - if (memoArray[1] != null) { + if (memoArray[1] != null) { RubyClass klass = (RubyClass) memoArray[1]; if (klass.respondsTo("json_creatable?") && klass.callMethod(context, "json_creatable?").isTrue()) { result = klass.callMethod(context, "json_create", result); } @@ -688,19 +721,18 @@ action exit { fhold; fbreak; } + + pair = ignore* begin_name >parse_name ignore* name_separator + ignore* begin_value >parse_value; + next_pair = ignore* value_separator pair; - a_pair = ignore* - begin_name >parse_name - ignore* name_separator ignore* - begin_value >parse_value; - - main := begin_object - (a_pair (ignore* value_separator a_pair)*)? - ignore* end_object @exit; + main := ( + begin_object (pair (next_pair)*)? ignore* end_object + ) @exit; }%% ParserResult parseObject(int p, int pe) { int cs = EVIL; IRubyObject lastName = null; @@ -728,11 +760,11 @@ // attempt to de-serialize object if (parser.createAdditions) { IRubyObject vKlassName = result.op_aref(context, parser.createId); if (!vKlassName.isNil()) { // might throw ArgumentError, we let it propagate - IRubyObject klass = parser.info.jsonModule. + IRubyObject klass = parser.info.jsonModule.get(). callMethod(context, "deep_const_get", vKlassName); if (klass.respondsTo("json_creatable?") && klass.callMethod(context, "json_creatable?").isTrue()) { returnedResult = klass.callMethod(context, "json_create", result); @@ -776,11 +808,11 @@ ( begin_object >parse_object | begin_array >parse_array ) ignore*; }%% - public IRubyObject parse() { + public IRubyObject parseStrict() { int cs = EVIL; int p, pe; IRubyObject result = null; %% write init; @@ -793,10 +825,58 @@ } else { throw unexpectedToken(p, pe); } } + %%{ + machine JSON_quirks_mode; + include JSON_common; + + write data; + + action parse_value { + ParserResult res = parseValue(fpc, pe); + if (res == null) { + fhold; + fbreak; + } else { + result = res.result; + fexec res.p; + } + } + + main := ignore* + ( begin_value >parse_value) + ignore*; + }%% + + public IRubyObject parseQuirksMode() { + int cs = EVIL; + int p, pe; + IRubyObject result = null; + + %% write init; + p = byteList.begin(); + pe = p + byteList.length(); + %% write exec; + + if (cs >= JSON_quirks_mode_first_final && p == pe) { + return result; + } else { + throw unexpectedToken(p, pe); + } + } + + public IRubyObject parse() { + if (parser.quirksMode) { + return parseQuirksMode(); + } else { + return parseStrict(); + } + + } + /** * Returns a subsequence of the source ByteList, based on source * array byte offsets (i.e., the ByteList's own begin offset is not * automatically added). * @param start @@ -811,10 +891,10 @@ /** * Retrieves a constant directly descended from the <code>JSON</code> module. * @param name The constant name */ private IRubyObject getConstant(String name) { - return parser.info.jsonModule.getConstant(name); + return parser.info.jsonModule.get().getConstant(name); } private RaiseException newException(String className, String message) { return Utils.newException(context, className, message); }