#include
/*
* call-seq:
* new
*
* Create a new document
*/
static VALUE new(int argc, VALUE *argv, VALUE klass)
{
VALUE uri, external_id, rest, rb_doc;
rb_scan_args(argc, argv, "0*", &rest);
uri = rb_ary_entry(rest, 0);
external_id = rb_ary_entry(rest, 1);
htmlDocPtr doc = htmlNewDoc(
RTEST(uri) ? (const xmlChar *)StringValuePtr(uri) : NULL,
RTEST(external_id) ? (const xmlChar *)StringValuePtr(external_id) : NULL
);
rb_doc = Nokogiri_wrap_xml_document(klass, doc);
rb_funcall2(rb_doc, rb_intern("initialize"), argc, argv);
return rb_doc ;
}
/*
* call-seq:
* read_io(io, url, encoding, options)
*
* Read the HTML document from +io+ with given +url+, +encoding+,
* and +options+. See Nokogiri::HTML.parse
*/
static VALUE read_io( VALUE klass,
VALUE io,
VALUE url,
VALUE encoding,
VALUE options )
{
const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
VALUE error_list = rb_ary_new();
xmlResetLastError();
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
htmlDocPtr doc = htmlReadIO(
io_read_callback,
io_close_callback,
(void *)io,
c_url,
c_enc,
NUM2INT(options)
);
xmlSetStructuredErrorFunc(NULL, NULL);
if(doc == NULL) {
xmlFreeDoc(doc);
xmlErrorPtr error = xmlGetLastError();
if(error)
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
);
else
rb_raise(rb_eRuntimeError, "Could not parse document");
return Qnil;
}
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
rb_funcall(document, rb_intern("errors="), 1, error_list);
return document;
}
/*
* call-seq:
* read_memory(string, url, encoding, options)
*
* Read the HTML document contained in +string+ with given +url+, +encoding+,
* and +options+. See Nokogiri::HTML.parse
*/
static VALUE read_memory( VALUE klass,
VALUE string,
VALUE url,
VALUE encoding,
VALUE options )
{
const char * c_buffer = StringValuePtr(string);
const char * c_url = (url == Qnil) ? NULL : StringValuePtr(url);
const char * c_enc = (encoding == Qnil) ? NULL : StringValuePtr(encoding);
int len = RSTRING_LEN(string);
VALUE error_list = rb_ary_new();
xmlResetLastError();
xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
htmlDocPtr doc = htmlReadMemory(c_buffer, len, c_url, c_enc, NUM2INT(options));
xmlSetStructuredErrorFunc(NULL, NULL);
if(doc == NULL) {
xmlFreeDoc(doc);
xmlErrorPtr error = xmlGetLastError();
if(error)
rb_funcall(rb_mKernel, rb_intern("raise"), 1,
Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error)
);
else
rb_raise(rb_eRuntimeError, "Could not parse document");
return Qnil;
}
VALUE document = Nokogiri_wrap_xml_document(klass, doc);
rb_funcall(document, rb_intern("errors="), 1, error_list);
return document;
}
/*
* call-seq:
* type
*
* The type for this document
*/
static VALUE type(VALUE self)
{
htmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
return INT2NUM((int)doc->type);
}
/*
* call-seq:
* meta_encoding=
*
* Set the meta tag encoding for this document.
*/
static VALUE set_meta_encoding(VALUE self, VALUE encoding)
{
htmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
htmlSetMetaEncoding(doc, (const xmlChar *)StringValuePtr(encoding));
return encoding;
}
/*
* call-seq:
* meta_encoding
*
* Get the meta tag encoding for this document.
*/
static VALUE meta_encoding(VALUE self)
{
htmlDocPtr doc;
Data_Get_Struct(self, xmlDoc, doc);
return NOKOGIRI_STR_NEW2(htmlGetMetaEncoding(doc), doc->encoding);
}
VALUE cNokogiriHtmlDocument ;
void init_html_document()
{
VALUE nokogiri = rb_define_module("Nokogiri");
VALUE html = rb_define_module_under(nokogiri, "HTML");
VALUE xml = rb_define_module_under(nokogiri, "XML");
VALUE node = rb_define_class_under(xml, "Node", rb_cObject);
VALUE xml_doc = rb_define_class_under(xml, "Document", node);
VALUE klass = rb_define_class_under(html, "Document", xml_doc);
cNokogiriHtmlDocument = klass;
rb_define_singleton_method(klass, "read_memory", read_memory, 4);
rb_define_singleton_method(klass, "read_io", read_io, 4);
rb_define_singleton_method(klass, "new", new, -1);
rb_define_method(klass, "type", type, 0);
rb_define_method(klass, "meta_encoding", meta_encoding, 0);
rb_define_method(klass, "meta_encoding=", set_meta_encoding, 1);
}