ext/nokogumbo/nokogumbo.c in nokogumbo-2.0.2 vs ext/nokogumbo/nokogumbo.c in nokogumbo-2.0.3
- old
+ new
@@ -279,10 +279,11 @@
const char *status_string = gumbo_status_to_string(output->status);
switch (output->status) {
case GUMBO_STATUS_OK:
break;
+ case GUMBO_STATUS_TOO_MANY_ATTRIBUTES:
case GUMBO_STATUS_TREE_TOO_DEEP:
gumbo_destroy_output(output);
rb_raise(rb_eArgError, "%s", status_string);
case GUMBO_STATUS_OUT_OF_MEMORY:
gumbo_destroy_output(output);
@@ -477,37 +478,64 @@
VALUE input;
VALUE url_or_frag;
xmlDocPtr doc;
} ParseArgs;
-static VALUE parse_cleanup(ParseArgs *args) {
+static void parse_args_mark(void *parse_args) {
+ ParseArgs *args = parse_args;
+ rb_gc_mark_maybe(args->input);
+ rb_gc_mark_maybe(args->url_or_frag);
+}
+
+// Wrap a ParseArgs pointer. The underlying ParseArgs must outlive the
+// wrapper.
+static VALUE wrap_parse_args(ParseArgs *args) {
+ return Data_Wrap_Struct(rb_cData, parse_args_mark, RUBY_NEVER_FREE, args);
+}
+
+// Returnsd the underlying ParseArgs wrapped by wrap_parse_args.
+static ParseArgs *unwrap_parse_args(VALUE obj) {
+ ParseArgs *args;
+ Data_Get_Struct(obj, ParseArgs, args);
+ return args;
+}
+
+static VALUE parse_cleanup(VALUE parse_args) {
+ ParseArgs *args = unwrap_parse_args(parse_args);
gumbo_destroy_output(args->output);
+ // Make sure garbage collection doesn't mark the objects as being live based
+ // on references from the ParseArgs. This may be unnecessary.
+ args->input = Qnil;
+ args->url_or_frag = Qnil;
if (args->doc != NIL)
xmlFreeDoc(args->doc);
return Qnil;
}
+static VALUE parse_continue(VALUE parse_args);
-static VALUE parse_continue(ParseArgs *args);
-
// Parse a string using gumbo_parse into a Nokogiri document
-static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_errors, VALUE max_depth) {
+static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth) {
GumboOptions options = kGumboDefaultOptions;
+ options.max_attributes = NUM2INT(max_attributes);
options.max_errors = NUM2INT(max_errors);
options.max_tree_depth = NUM2INT(max_depth);
GumboOutput *output = perform_parse(&options, input);
ParseArgs args = {
.output = output,
.input = input,
.url_or_frag = url,
.doc = NIL,
};
- return rb_ensure(parse_continue, (VALUE)&args, parse_cleanup, (VALUE)&args);
+ VALUE parse_args = wrap_parse_args(&args);
+
+ return rb_ensure(parse_continue, parse_args, parse_cleanup, parse_args);
}
-static VALUE parse_continue(ParseArgs *args) {
+static VALUE parse_continue(VALUE parse_args) {
+ ParseArgs *args = unwrap_parse_args(parse_args);
GumboOutput *output = args->output;
xmlDocPtr doc;
if (output->document->v.document.has_doctype) {
const char *name = output->document->v.document.name;
const char *public = output->document->v.document.public_identifier;
@@ -561,17 +589,18 @@
#else
return node;
#endif
}
-static VALUE fragment_continue(ParseArgs *args);
+static VALUE fragment_continue(VALUE parse_args);
static VALUE fragment (
VALUE self,
VALUE doc_fragment,
VALUE tags,
VALUE ctx,
+ VALUE max_attributes,
VALUE max_errors,
VALUE max_depth
) {
ID name = rb_intern_const("name");
const char *ctx_tag;
@@ -674,10 +703,11 @@
}
// Perform a fragment parse.
int depth = NUM2INT(max_depth);
GumboOptions options = kGumboDefaultOptions;
+ options.max_attributes = NUM2INT(max_attributes);
options.max_errors = NUM2INT(max_errors);
// Add one to account for the HTML element.
options.max_tree_depth = depth < 0 ? -1 : (depth + 1);
options.fragment_context = ctx_tag;
options.fragment_namespace = ctx_ns;
@@ -690,15 +720,17 @@
.output = output,
.input = tags,
.url_or_frag = doc_fragment,
.doc = (xmlDocPtr)extract_xml_node(doc),
};
- rb_ensure(fragment_continue, (VALUE)&args, parse_cleanup, (VALUE)&args);
+ VALUE parse_args = wrap_parse_args(&args);
+ rb_ensure(fragment_continue, parse_args, parse_cleanup, parse_args);
return Qnil;
}
-static VALUE fragment_continue(ParseArgs *args) {
+static VALUE fragment_continue(VALUE parse_args) {
+ ParseArgs *args = unwrap_parse_args(parse_args);
GumboOutput *output = args->output;
VALUE doc_fragment = args->url_or_frag;
xmlDocPtr xml_doc = args->doc;
args->doc = NIL; // The Ruby runtime owns doc so make sure we don't delete it.
@@ -718,14 +750,19 @@
#if !NGLIB
// Class constants.
VALUE mNokogiri = rb_const_get(rb_cObject, rb_intern_const("Nokogiri"));
VALUE mNokogiriXml = rb_const_get(mNokogiri, rb_intern_const("XML"));
cNokogiriXmlSyntaxError = rb_const_get(mNokogiriXml, rb_intern_const("SyntaxError"));
+ rb_gc_register_mark_object(cNokogiriXmlSyntaxError);
cNokogiriXmlElement = rb_const_get(mNokogiriXml, rb_intern_const("Element"));
+ rb_gc_register_mark_object(cNokogiriXmlElement);
cNokogiriXmlText = rb_const_get(mNokogiriXml, rb_intern_const("Text"));
+ rb_gc_register_mark_object(cNokogiriXmlText);
cNokogiriXmlCData = rb_const_get(mNokogiriXml, rb_intern_const("CDATA"));
+ rb_gc_register_mark_object(cNokogiriXmlCData);
cNokogiriXmlComment = rb_const_get(mNokogiriXml, rb_intern_const("Comment"));
+ rb_gc_register_mark_object(cNokogiriXmlComment);
// Interned symbols.
new = rb_intern_const("new");
node_name_ = rb_intern_const("node_name=");
@@ -734,18 +771,19 @@
#endif
// Class constants.
VALUE HTML5 = rb_const_get(mNokogiri, rb_intern_const("HTML5"));
Document = rb_const_get(HTML5, rb_intern_const("Document"));
+ rb_gc_register_mark_object(Document);
// Interned symbols.
internal_subset = rb_intern_const("internal_subset");
parent = rb_intern_const("parent");
// Define Nokogumbo module with parse and fragment methods.
VALUE Gumbo = rb_define_module("Nokogumbo");
- rb_define_singleton_method(Gumbo, "parse", parse, 4);
- rb_define_singleton_method(Gumbo, "fragment", fragment, 5);
+ rb_define_singleton_method(Gumbo, "parse", parse, 5);
+ rb_define_singleton_method(Gumbo, "fragment", fragment, 6);
// Add private constant for testing.
rb_define_const(Gumbo, "LINE_SUPPORTED", line_supported);
rb_funcall(Gumbo, rb_intern_const("private_constant"), 1,
rb_utf8_str_new_cstr("LINE_SUPPORTED"));