#include VALUE cNokogiriHtmlSaxParserContext ; static void deallocate(xmlParserCtxtPtr ctxt) { NOKOGIRI_DEBUG_START(handler); ctxt->sax = NULL; htmlFreeParserCtxt(ctxt); NOKOGIRI_DEBUG_END(handler); } static VALUE parse_memory(VALUE klass, VALUE data, VALUE encoding) { if(NIL_P(data)) rb_raise(rb_eArgError, "data cannot be nil"); if(!(int)RSTRING_LEN(data)) rb_raise(rb_eRuntimeError, "data cannot be empty"); htmlParserCtxtPtr ctxt = htmlCreateMemoryParserCtxt( StringValuePtr(data), (int)RSTRING_LEN(data) ); if(RTEST(encoding)) { xmlCharEncoding enc = xmlParseCharEncoding(StringValuePtr(encoding)); if(enc != XML_CHAR_ENCODING_ERROR) { xmlSwitchEncoding(ctxt, enc); if(ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) { rb_raise(rb_eRuntimeError, "Unsupported encoding %s", StringValuePtr(encoding)); } } } return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); } static VALUE parse_file(VALUE klass, VALUE filename, VALUE encoding) { htmlParserCtxtPtr ctxt = htmlCreateFileParserCtxt( StringValuePtr(filename), StringValuePtr(encoding) ); return Data_Wrap_Struct(klass, NULL, deallocate, ctxt); } static VALUE parse_with(VALUE self, VALUE sax_handler) { if(!rb_obj_is_kind_of(sax_handler, cNokogiriXmlSaxParser)) rb_raise(rb_eArgError, "argument must be a Nokogiri::XML::SAX::Parser"); htmlParserCtxtPtr ctxt; Data_Get_Struct(self, htmlParserCtxt, ctxt); htmlSAXHandlerPtr sax; Data_Get_Struct(sax_handler, htmlSAXHandler, sax); // Free the sax handler since we'll assign our own if(ctxt->sax && ctxt->sax != (xmlSAXHandlerPtr)&xmlDefaultSAXHandler) xmlFree(ctxt->sax); ctxt->sax = sax; ctxt->userData = (void *)NOKOGIRI_SAX_TUPLE_NEW(ctxt, sax_handler); htmlParseDocument(ctxt); if(NULL != ctxt->myDoc) xmlFreeDoc(ctxt->myDoc); NOKOGIRI_SAX_TUPLE_DESTROY(ctxt->userData); return self; } void init_html_sax_parser_context() { VALUE nokogiri = rb_define_module("Nokogiri"); VALUE xml = rb_define_module_under(nokogiri, "XML"); VALUE html = rb_define_module_under(nokogiri, "HTML"); VALUE sax = rb_define_module_under(xml, "SAX"); VALUE hsax = rb_define_module_under(html, "SAX"); VALUE pc = rb_define_class_under(sax, "ParserContext", rb_cObject); VALUE klass = rb_define_class_under(hsax, "ParserContext", pc); cNokogiriHtmlSaxParserContext = klass; rb_define_singleton_method(klass, "memory", parse_memory, 2); rb_define_singleton_method(klass, "file", parse_file, 2); rb_define_method(klass, "parse_with", parse_with, 1); }