ext/nokogumbo/nokogumbo.c in nokogumbo-2.0.2 vs ext/nokogumbo/nokogumbo.c in nokogumbo-2.0.3

- old
+ new

@@ -279,10 +279,11 @@ const char *status_string = gumbo_status_to_string(output->status); switch (output->status) { case GUMBO_STATUS_OK: break; + case GUMBO_STATUS_TOO_MANY_ATTRIBUTES: case GUMBO_STATUS_TREE_TOO_DEEP: gumbo_destroy_output(output); rb_raise(rb_eArgError, "%s", status_string); case GUMBO_STATUS_OUT_OF_MEMORY: gumbo_destroy_output(output); @@ -477,37 +478,64 @@ VALUE input; VALUE url_or_frag; xmlDocPtr doc; } ParseArgs; -static VALUE parse_cleanup(ParseArgs *args) { +static void parse_args_mark(void *parse_args) { + ParseArgs *args = parse_args; + rb_gc_mark_maybe(args->input); + rb_gc_mark_maybe(args->url_or_frag); +} + +// Wrap a ParseArgs pointer. The underlying ParseArgs must outlive the +// wrapper. +static VALUE wrap_parse_args(ParseArgs *args) { + return Data_Wrap_Struct(rb_cData, parse_args_mark, RUBY_NEVER_FREE, args); +} + +// Returnsd the underlying ParseArgs wrapped by wrap_parse_args. +static ParseArgs *unwrap_parse_args(VALUE obj) { + ParseArgs *args; + Data_Get_Struct(obj, ParseArgs, args); + return args; +} + +static VALUE parse_cleanup(VALUE parse_args) { + ParseArgs *args = unwrap_parse_args(parse_args); gumbo_destroy_output(args->output); + // Make sure garbage collection doesn't mark the objects as being live based + // on references from the ParseArgs. This may be unnecessary. + args->input = Qnil; + args->url_or_frag = Qnil; if (args->doc != NIL) xmlFreeDoc(args->doc); return Qnil; } +static VALUE parse_continue(VALUE parse_args); -static VALUE parse_continue(ParseArgs *args); - // Parse a string using gumbo_parse into a Nokogiri document -static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_errors, VALUE max_depth) { +static VALUE parse(VALUE self, VALUE input, VALUE url, VALUE max_attributes, VALUE max_errors, VALUE max_depth) { GumboOptions options = kGumboDefaultOptions; + options.max_attributes = NUM2INT(max_attributes); options.max_errors = NUM2INT(max_errors); options.max_tree_depth = NUM2INT(max_depth); GumboOutput *output = perform_parse(&options, input); ParseArgs args = { .output = output, .input = input, .url_or_frag = url, .doc = NIL, }; - return rb_ensure(parse_continue, (VALUE)&args, parse_cleanup, (VALUE)&args); + VALUE parse_args = wrap_parse_args(&args); + + return rb_ensure(parse_continue, parse_args, parse_cleanup, parse_args); } -static VALUE parse_continue(ParseArgs *args) { +static VALUE parse_continue(VALUE parse_args) { + ParseArgs *args = unwrap_parse_args(parse_args); GumboOutput *output = args->output; xmlDocPtr doc; if (output->document->v.document.has_doctype) { const char *name = output->document->v.document.name; const char *public = output->document->v.document.public_identifier; @@ -561,17 +589,18 @@ #else return node; #endif } -static VALUE fragment_continue(ParseArgs *args); +static VALUE fragment_continue(VALUE parse_args); static VALUE fragment ( VALUE self, VALUE doc_fragment, VALUE tags, VALUE ctx, + VALUE max_attributes, VALUE max_errors, VALUE max_depth ) { ID name = rb_intern_const("name"); const char *ctx_tag; @@ -674,10 +703,11 @@ } // Perform a fragment parse. int depth = NUM2INT(max_depth); GumboOptions options = kGumboDefaultOptions; + options.max_attributes = NUM2INT(max_attributes); options.max_errors = NUM2INT(max_errors); // Add one to account for the HTML element. options.max_tree_depth = depth < 0 ? -1 : (depth + 1); options.fragment_context = ctx_tag; options.fragment_namespace = ctx_ns; @@ -690,15 +720,17 @@ .output = output, .input = tags, .url_or_frag = doc_fragment, .doc = (xmlDocPtr)extract_xml_node(doc), }; - rb_ensure(fragment_continue, (VALUE)&args, parse_cleanup, (VALUE)&args); + VALUE parse_args = wrap_parse_args(&args); + rb_ensure(fragment_continue, parse_args, parse_cleanup, parse_args); return Qnil; } -static VALUE fragment_continue(ParseArgs *args) { +static VALUE fragment_continue(VALUE parse_args) { + ParseArgs *args = unwrap_parse_args(parse_args); GumboOutput *output = args->output; VALUE doc_fragment = args->url_or_frag; xmlDocPtr xml_doc = args->doc; args->doc = NIL; // The Ruby runtime owns doc so make sure we don't delete it. @@ -718,14 +750,19 @@ #if !NGLIB // Class constants. VALUE mNokogiri = rb_const_get(rb_cObject, rb_intern_const("Nokogiri")); VALUE mNokogiriXml = rb_const_get(mNokogiri, rb_intern_const("XML")); cNokogiriXmlSyntaxError = rb_const_get(mNokogiriXml, rb_intern_const("SyntaxError")); + rb_gc_register_mark_object(cNokogiriXmlSyntaxError); cNokogiriXmlElement = rb_const_get(mNokogiriXml, rb_intern_const("Element")); + rb_gc_register_mark_object(cNokogiriXmlElement); cNokogiriXmlText = rb_const_get(mNokogiriXml, rb_intern_const("Text")); + rb_gc_register_mark_object(cNokogiriXmlText); cNokogiriXmlCData = rb_const_get(mNokogiriXml, rb_intern_const("CDATA")); + rb_gc_register_mark_object(cNokogiriXmlCData); cNokogiriXmlComment = rb_const_get(mNokogiriXml, rb_intern_const("Comment")); + rb_gc_register_mark_object(cNokogiriXmlComment); // Interned symbols. new = rb_intern_const("new"); node_name_ = rb_intern_const("node_name="); @@ -734,18 +771,19 @@ #endif // Class constants. VALUE HTML5 = rb_const_get(mNokogiri, rb_intern_const("HTML5")); Document = rb_const_get(HTML5, rb_intern_const("Document")); + rb_gc_register_mark_object(Document); // Interned symbols. internal_subset = rb_intern_const("internal_subset"); parent = rb_intern_const("parent"); // Define Nokogumbo module with parse and fragment methods. VALUE Gumbo = rb_define_module("Nokogumbo"); - rb_define_singleton_method(Gumbo, "parse", parse, 4); - rb_define_singleton_method(Gumbo, "fragment", fragment, 5); + rb_define_singleton_method(Gumbo, "parse", parse, 5); + rb_define_singleton_method(Gumbo, "fragment", fragment, 6); // Add private constant for testing. rb_define_const(Gumbo, "LINE_SUPPORTED", line_supported); rb_funcall(Gumbo, rb_intern_const("private_constant"), 1, rb_utf8_str_new_cstr("LINE_SUPPORTED"));