#include <xml_document.h>

static int dealloc_node_i(xmlNodePtr key, xmlNodePtr node, xmlDocPtr doc)
  switch(node->type) {
    if(node->parent == NULL) {
      xmlAddChild((xmlNodePtr)doc, node);
  return ST_CONTINUE;

static void dealloc(xmlDocPtr doc)
  xmlDeregisterNodeFunc func;
  st_table *node_hash;

  func = xmlDeregisterNodeDefault(NULL);

  node_hash  = DOC_UNLINKED_NODE_HASH(doc);

  st_foreach(node_hash, dealloc_node_i, (st_data_t)doc);

  doc->_private = NULL;


static void recursively_remove_namespaces_from_node(xmlNodePtr node)
  xmlNodePtr child ;
  xmlAttrPtr property ;

  xmlSetNs(node, NULL);

  for (child = node->children ; child ; child = child->next)

  if (((node->type == XML_ELEMENT_NODE) ||
       (node->type == XML_XINCLUDE_START) ||
       (node->type == XML_XINCLUDE_END)) &&
      node->nsDef) {
    node->nsDef = NULL;

  if (node->type == XML_ELEMENT_NODE && node->properties != NULL) {
    property = node->properties ;
    while (property != NULL) {
      if (property->ns) property->ns = NULL ;
      property = property->next ;

 * call-seq:
 *  url
 * Get the url name for this document.
static VALUE url(VALUE self)
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if(doc->URL) return NOKOGIRI_STR_NEW2(doc->URL);

  return Qnil;

 * call-seq:
 *  root=
 * Set the root element on this document
static VALUE set_root(VALUE self, VALUE root)
  xmlDocPtr doc;
  xmlNodePtr new_root;
  xmlNodePtr old_root;

  Data_Get_Struct(self, xmlDoc, doc);

  old_root = NULL;

  if(NIL_P(root)) {
    old_root = xmlDocGetRootElement(doc);

    if(old_root) {

    return root;

  Data_Get_Struct(root, xmlNode, new_root);

  /* If the new root's document is not the same as the current document,
   * then we need to dup the node in to this document. */
  if(new_root->doc != doc) {
    old_root = xmlDocGetRootElement(doc);
    if (!(new_root = xmlDocCopyNode(new_root, doc, 1))) {
      rb_raise(rb_eRuntimeError, "Could not reparent node (xmlDocCopyNode)");

  xmlDocSetRootElement(doc, new_root);
  if(old_root) nokogiri_root_node(old_root);
  return root;

 * call-seq:
 *  root
 * Get the root node for this document.
static VALUE root(VALUE self)
  xmlDocPtr doc;
  xmlNodePtr root;

  Data_Get_Struct(self, xmlDoc, doc);

  root = xmlDocGetRootElement(doc);

  if(!root) return Qnil;
  return Nokogiri_wrap_xml_node(Qnil, root) ;

 * call-seq:
 *  encoding= encoding
 * Set the encoding string for this Document
static VALUE set_encoding(VALUE self, VALUE encoding)
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if (doc->encoding)
      free((char *) doc->encoding); // this may produce a gcc cast warning

  doc->encoding = xmlStrdup((xmlChar *)StringValuePtr(encoding));

  return encoding;

 * call-seq:
 *  encoding
 * Get the encoding for this Document
static VALUE encoding(VALUE self)
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if(!doc->encoding) return Qnil;
  return NOKOGIRI_STR_NEW2(doc->encoding);

 * call-seq:
 *  version
 * Get the XML version for this Document
static VALUE version(VALUE self)
  xmlDocPtr doc;
  Data_Get_Struct(self, xmlDoc, doc);

  if(!doc->version) return Qnil;
  return NOKOGIRI_STR_NEW2(doc->version);

 * call-seq:
 *  read_io(io, url, encoding, options)
 * Create a new document from an IO object
static VALUE read_io( VALUE klass,
                      VALUE io,
                      VALUE url,
                      VALUE encoding,
                      VALUE options )
  const char * c_url    = NIL_P(url)      ? NULL : StringValuePtr(url);
  const char * c_enc    = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
  VALUE error_list      = rb_ary_new();
  VALUE document;
  xmlDocPtr doc;

  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);

  doc = xmlReadIO(
      (void *)io,
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(doc == NULL) {
    xmlErrorPtr error;


    error = xmlGetLastError();
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
      rb_raise(rb_eRuntimeError, "Could not parse document");

    return Qnil;

  document = Nokogiri_wrap_xml_document(klass, doc);
  rb_iv_set(document, "@errors", error_list);
  return document;

 * call-seq:
 *  read_memory(string, url, encoding, options)
 * Create a new document from a String
static VALUE read_memory( VALUE klass,
                          VALUE string,
                          VALUE url,
                          VALUE encoding,
                          VALUE options )
  const char * c_buffer = StringValuePtr(string);
  const char * c_url    = NIL_P(url)      ? NULL : StringValuePtr(url);
  const char * c_enc    = NIL_P(encoding) ? NULL : StringValuePtr(encoding);
  int len               = (int)RSTRING_LEN(string);
  VALUE error_list      = rb_ary_new();
  VALUE document;
  xmlDocPtr doc;

  xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
  doc = xmlReadMemory(c_buffer, len, c_url, c_enc, (int)NUM2INT(options));
  xmlSetStructuredErrorFunc(NULL, NULL);

  if(doc == NULL) {
    xmlErrorPtr error;


    error = xmlGetLastError();
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
      rb_raise(rb_eRuntimeError, "Could not parse document");

    return Qnil;

  document = Nokogiri_wrap_xml_document(klass, doc);
  rb_iv_set(document, "@errors", error_list);
  return document;

 * call-seq:
 *  dup
 * Copy this Document.  An optional depth may be passed in, but it defaults
 * to a deep copy.  0 is a shallow copy, 1 is a deep copy.
static VALUE duplicate_node(int argc, VALUE *argv, VALUE self)
  xmlDocPtr doc, dup;
  VALUE level;

  if(rb_scan_args(argc, argv, "01", &level) == 0)
    level = INT2NUM((long)1);

  Data_Get_Struct(self, xmlDoc, doc);

  dup = xmlCopyDoc(doc, (int)NUM2INT(level));
  if(dup == NULL) return Qnil;

  dup->type = doc->type;
  return Nokogiri_wrap_xml_document(rb_obj_class(self), dup);

 * call-seq:
 *  new(version = default)
 * Create a new document with +version+ (defaults to "1.0")
static VALUE new(int argc, VALUE *argv, VALUE klass)
  xmlDocPtr doc;
  VALUE version, rest, rb_doc ;

  rb_scan_args(argc, argv, "0*", &rest);
  version = rb_ary_entry(rest, (long)0);
  if (NIL_P(version)) version = rb_str_new2("1.0");

  doc = xmlNewDoc((xmlChar *)StringValuePtr(version));
  rb_doc = Nokogiri_wrap_xml_document(klass, doc);
  rb_obj_call_init(rb_doc, argc, argv);
  return rb_doc ;

 *  call-seq:
 *    remove_namespaces!
 *  Remove all namespaces from all nodes in the document.
 *  This could be useful for developers who either don't understand namespaces
 *  or don't care about them.
 *  The following example shows a use case, and you can decide for yourself
 *  whether this is a good thing or not:
 *    doc = Nokogiri::XML <<-EOXML
 *       <root>
 *         <car xmlns:part="http://general-motors.com/">
 *           <part:tire>Michelin Model XGV</part:tire>
 *         </car>
 *         <bicycle xmlns:part="http://schwinn.com/">
 *           <part:tire>I'm a bicycle tire!</part:tire>
 *         </bicycle>
 *       </root>
 *       EOXML
 *    doc.xpath("//tire").to_s # => ""
 *    doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => "<part:tire>Michelin Model XGV</part:tire>"
 *    doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => "<part:tire>I'm a bicycle tire!</part:tire>"
 *    doc.remove_namespaces!
 *    doc.xpath("//tire").to_s # => "<tire>Michelin Model XGV</tire><tire>I'm a bicycle tire!</tire>"
 *    doc.xpath("//part:tire", "part" => "http://general-motors.com/").to_s # => ""
 *    doc.xpath("//part:tire", "part" => "http://schwinn.com/").to_s # => ""
 *  For more information on why this probably is *not* a good thing in general,
 *  please direct your browser to
 *  http://tenderlovemaking.com/2009/04/23/namespaces-in-xml/
VALUE remove_namespaces_bang(VALUE self)
  xmlDocPtr doc ;
  Data_Get_Struct(self, xmlDoc, doc);

  return self;

/* call-seq: doc.create_entity(name, type, external_id, system_id, content)
 * Create a new entity named +name+.
 * +type+ is an integer representing the type of entity to be created, and it
 * defaults to Nokogiri::XML::EntityDecl::INTERNAL_GENERAL.  See
 * the constants on Nokogiri::XML::EntityDecl for more information.
 * +external_id+, +system_id+, and +content+ set the External ID, System ID,
 * and content respectively.  All of these parameters are optional.
static VALUE create_entity(int argc, VALUE *argv, VALUE self)
  VALUE name;
  VALUE type;
  VALUE external_id;
  VALUE system_id;
  VALUE content;
  xmlEntityPtr ptr;
  xmlDocPtr doc ;

  Data_Get_Struct(self, xmlDoc, doc);

  rb_scan_args(argc, argv, "14", &name, &type, &external_id, &system_id,

  ptr = xmlAddDocEntity(
      (xmlChar *)(NIL_P(name)        ? NULL                        : StringValuePtr(name)),
      (int)      (NIL_P(type)        ? XML_INTERNAL_GENERAL_ENTITY : NUM2INT(type)),
      (xmlChar *)(NIL_P(external_id) ? NULL                        : StringValuePtr(external_id)),
      (xmlChar *)(NIL_P(system_id)   ? NULL                        : StringValuePtr(system_id)),
      (xmlChar *)(NIL_P(content)     ? NULL                        : StringValuePtr(content))

  if(NULL == ptr) {
    xmlErrorPtr error = xmlGetLastError();
      rb_exc_raise(Nokogiri_wrap_xml_syntax_error((VALUE)NULL, error));
      rb_raise(rb_eRuntimeError, "Could not create entity");

    return Qnil;

  return Nokogiri_wrap_xml_node(cNokogiriXmlEntityDecl, (xmlNodePtr)ptr);

static int block_caller(void * ctx, xmlNodePtr _node, xmlNodePtr _parent)
  VALUE block;
  VALUE node;
  VALUE parent;
  VALUE ret;

  if(_node->type == XML_NAMESPACE_DECL){
    node = Nokogiri_wrap_xml_namespace(_parent->doc, (xmlNsPtr) _node);
    node   = Nokogiri_wrap_xml_node(Qnil, _node);
  parent = _parent ? Nokogiri_wrap_xml_node(Qnil, _parent) : Qnil;
  block  = (VALUE)ctx;

  ret = rb_funcall(block, rb_intern("call"), 2, node, parent);

  if(Qfalse == ret || Qnil == ret) return 0;

  return 1;

/* call-seq:
 *  doc.canonicalize(mode=XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
 *  doc.canonicalize { |obj, parent| ... }
 * Canonicalize a document and return the results.  Takes an optional block
 * that takes two parameters: the +obj+ and that node's +parent+.  
 * The  +obj+ will be either a Nokogiri::XML::Node, or a Nokogiri::XML::Namespace
 * The block must return a non-nil, non-false value if the +obj+ passed in 
 * should be included in the canonicalized document.
static VALUE canonicalize(int argc, VALUE* argv, VALUE self)
  VALUE mode;
  VALUE incl_ns;
  VALUE with_comments;
  xmlChar **ns;
  long ns_len, i;

  xmlDocPtr doc;
  xmlOutputBufferPtr buf;
  xmlC14NIsVisibleCallback cb = NULL;
  void * ctx = NULL;

  VALUE rb_cStringIO;
  VALUE io;

  rb_scan_args(argc, argv, "03", &mode, &incl_ns, &with_comments);

  Data_Get_Struct(self, xmlDoc, doc);

  rb_cStringIO = rb_const_get_at(rb_cObject, rb_intern("StringIO"));
  io           = rb_class_new_instance(0, 0, rb_cStringIO);
  buf          = xmlAllocOutputBuffer(NULL);

  buf->writecallback = (xmlOutputWriteCallback)io_write_callback;
  buf->closecallback = (xmlOutputCloseCallback)io_close_callback;
  buf->context       = (void *)io;

  if(rb_block_given_p()) {
    cb = block_caller;
    ctx = (void *)rb_block_proc();

    ns = NULL;
    ns_len = RARRAY_LEN(incl_ns);
    ns = calloc((size_t)ns_len+1, sizeof(xmlChar *));
    for (i = 0 ; i < ns_len ; i++) {
      VALUE entry = rb_ary_entry(incl_ns, i);
      const char * ptr = StringValuePtr(entry);
      ns[i] = (xmlChar*) ptr;

  xmlC14NExecute(doc, cb, ctx, 
    (int)      (NIL_P(mode)        ? 0 : NUM2INT(mode)), 
    (int)      (NIL_P(with_comments)        ? 0 : 1),


  return rb_funcall(io, rb_intern("string"), 0);

VALUE cNokogiriXmlDocument ;
void init_xml_document()
  VALUE nokogiri  = rb_define_module("Nokogiri");
  VALUE xml       = rb_define_module_under(nokogiri, "XML");
  VALUE node      = rb_define_class_under(xml, "Node", rb_cObject);

   * Nokogiri::XML::Document wraps an xml document.
  VALUE klass = rb_define_class_under(xml, "Document", node);

  cNokogiriXmlDocument = klass;

  rb_define_singleton_method(klass, "read_memory", read_memory, 4);
  rb_define_singleton_method(klass, "read_io", read_io, 4);
  rb_define_singleton_method(klass, "new", new, -1);

  rb_define_method(klass, "root", root, 0);
  rb_define_method(klass, "root=", set_root, 1);
  rb_define_method(klass, "encoding", encoding, 0);
  rb_define_method(klass, "encoding=", set_encoding, 1);
  rb_define_method(klass, "version", version, 0);
  rb_define_method(klass, "canonicalize", canonicalize, -1);
  rb_define_method(klass, "dup", duplicate_node, -1);
  rb_define_method(klass, "url", url, 0);
  rb_define_method(klass, "create_entity", create_entity, -1);
  rb_define_method(klass, "remove_namespaces!", remove_namespaces_bang, 0);

/* this takes klass as a param because it's used for HtmlDocument, too. */
VALUE Nokogiri_wrap_xml_document(VALUE klass, xmlDocPtr doc)
  nokogiriTuplePtr tuple = (nokogiriTuplePtr)malloc(sizeof(nokogiriTuple));

  VALUE rb_doc = Data_Wrap_Struct(
      klass ? klass : cNokogiriXmlDocument,

  VALUE cache = rb_ary_new();
  rb_iv_set(rb_doc, "@decorators", Qnil);
  rb_iv_set(rb_doc, "@node_cache", cache);

  tuple->doc = rb_doc;
  tuple->unlinkedNodes = st_init_numtable_with_size(128);
  tuple->node_cache = cache;
  doc->_private = tuple ;

  rb_obj_call_init(rb_doc, 0, NULL);

  return rb_doc ;