#include
VALUE cNokogiriHtml4SaxParserContext ;
static void
deallocate(xmlParserCtxtPtr ctxt)
{
NOKOGIRI_DEBUG_START(ctxt);
ctxt->sax = NULL;
htmlFreeParserCtxt(ctxt);
NOKOGIRI_DEBUG_END(ctxt);
}
static VALUE
parse_memory(VALUE klass, VALUE data, VALUE encoding)
{
htmlParserCtxtPtr ctxt;
Check_Type(data, T_STRING);
if (!(int)RSTRING_LEN(data)) {
rb_raise(rb_eRuntimeError, "data cannot be empty");
}
ctxt = htmlCreateMemoryParserCtxt(StringValuePtr(data),
(int)RSTRING_LEN(data));
if (ctxt->sax) {
xmlFree(ctxt->sax);
ctxt->sax = NULL;
}
if (RTEST(encoding)) {
xmlCharEncodingHandlerPtr enc = xmlFindCharEncodingHandler(StringValueCStr(encoding));
if (enc != NULL) {
xmlSwitchToEncoding(ctxt, enc);
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
rb_raise(rb_eRuntimeError, "Unsupported encoding %s",
StringValueCStr(encoding));
}
}
}
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
}
static VALUE
parse_file(VALUE klass, VALUE filename, VALUE encoding)
{
htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt(
StringValueCStr(filename),
StringValueCStr(encoding)
);
return Data_Wrap_Struct(klass, NULL, deallocate, ctxt);
}
static VALUE
parse_doc(VALUE ctxt_val)
{
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
htmlParseDocument(ctxt);
return Qnil;
}
static VALUE
parse_doc_finalize(VALUE ctxt_val)
{
htmlParserCtxtPtr ctxt = (htmlParserCtxtPtr)ctxt_val;
if (ctxt->myDoc) {
xmlFreeDoc(ctxt->myDoc);
}
NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData);
return Qnil;
}
static VALUE
parse_with(VALUE self, VALUE sax_handler)
{
htmlParserCtxtPtr ctxt;
htmlSAXHandlerPtr sax;
if (!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) {
rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser");
}
Data_Get_Struct(self, htmlParserCtxt, ctxt);
Data_Get_Struct(sax_handler, htmlSAXHandler, sax);
/* Free the sax handler since we'll assign our own */
if (ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) {
xmlFree(ctxt->sax);
}
ctxt->sax = sax;
ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler);
xmlSetStructuredErrorFunc(NULL, NULL);
rb_ensure(parse_doc, (VALUE)ctxt, parse_doc_finalize, (VALUE)ctxt);
return self;
}
void
noko_init_html_sax_parser_context()
{
assert(cNokogiriXmlSaxParserContext);
cNokogiriHtml4SaxParserContext = rb_define_class_under(mNokogiriHtml4Sax, "ParserContext",
cNokogiriXmlSaxParserContext);
rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "memory", parse_memory, 2);
rb_define_singleton_method(cNokogiriHtml4SaxParserContext, "file", parse_file, 2);
rb_define_method(cNokogiriHtml4SaxParserContext, "parse_with", parse_with, 1);
}