#include VALUE cNokogiriXmlReader; static void dealloc(xmlTextReaderPtr reader) { NOKOGIRI_DEBUG_START(reader); xmlFreeTextReader(reader); NOKOGIRI_DEBUG_END(reader); } static int has_attributes(xmlTextReaderPtr reader) { /* * this implementation of xmlTextReaderHasAttributes explicitly includes * namespaces and properties, because some earlier versions ignore * namespaces. */ xmlNodePtr node ; node = xmlTextReaderCurrentNode(reader); if (node == NULL) { return (0); } if ((node->type == XML_ELEMENT_NODE) && ((node->properties != NULL) || (node->nsDef != NULL))) { return (1); } return (0); } static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) { xmlNsPtr ns; VALUE key; if (node->type != XML_ELEMENT_NODE) { return ; } ns = node->nsDef; while (ns != NULL) { key = rb_enc_str_new_cstr(XMLNS_PREFIX, rb_utf8_encoding()); if (ns->prefix) { rb_str_cat_cstr(key, ":"); rb_str_cat_cstr(key, (const char *)ns->prefix); } key = rb_str_conv_enc(key, rb_utf8_encoding(), rb_default_internal_encoding()); rb_hash_aset(attr_hash, key, (ns->href ? NOKOGIRI_STR_NEW2(ns->href) : Qnil) ); ns = ns->next ; } } /* * call-seq: * default? * * Was an attribute generated from the default value in the DTD or schema? */ static VALUE default_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = xmlTextReaderIsDefault(reader); if (eh == 0) { return Qfalse; } if (eh == 1) { return Qtrue; } return Qnil; } /* * call-seq: * value? * * Does this node have a text value? */ static VALUE value_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = xmlTextReaderHasValue(reader); if (eh == 0) { return Qfalse; } if (eh == 1) { return Qtrue; } return Qnil; } /* * call-seq: * attributes? * * Does this node have attributes? */ static VALUE attributes_eh(VALUE self) { xmlTextReaderPtr reader; int eh; Data_Get_Struct(self, xmlTextReader, reader); eh = has_attributes(reader); if (eh == 0) { return Qfalse; } if (eh == 1) { return Qtrue; } return Qnil; } /* * call-seq: * namespaces * * Get a hash of namespaces for this Node */ static VALUE namespaces(VALUE self) { xmlTextReaderPtr reader; xmlNodePtr ptr; VALUE attr ; Data_Get_Struct(self, xmlTextReader, reader); attr = rb_hash_new() ; if (! has_attributes(reader)) { return attr ; } ptr = xmlTextReaderExpand(reader); if (ptr == NULL) { return Qnil; } Nokogiri_xml_node_namespaces(ptr, attr); return attr ; } /* * @overload attribute_nodes() * Get the attributes of the current node as an Array of Attr * @return [Array] */ static VALUE rb_xml_reader_attribute_nodes(VALUE rb_reader) { xmlTextReaderPtr c_reader; xmlNodePtr c_node; VALUE attr_nodes; int j; Data_Get_Struct(rb_reader, xmlTextReader, c_reader); if (! has_attributes(c_reader)) { return rb_ary_new() ; } c_node = xmlTextReaderExpand(c_reader); if (c_node == NULL) { return Qnil; } attr_nodes = noko_xml_node_attrs(c_node); /* ensure that the Reader won't be GCed as long as a node is referenced */ for (j = 0 ; j < RARRAY_LEN(attr_nodes) ; j++) { rb_iv_set(rb_ary_entry(attr_nodes, j), "@reader", rb_reader); } return attr_nodes; } /* * call-seq: * attribute_at(index) * * Get the value of attribute at +index+ */ static VALUE attribute_at(VALUE self, VALUE index) { xmlTextReaderPtr reader; xmlChar *value; VALUE rb_value; Data_Get_Struct(self, xmlTextReader, reader); if (NIL_P(index)) { return Qnil; } index = rb_Integer(index); value = xmlTextReaderGetAttributeNo( reader, (int)NUM2INT(index) ); if (value == NULL) { return Qnil; } rb_value = NOKOGIRI_STR_NEW2(value); xmlFree(value); return rb_value; } /* * call-seq: * attribute(name) * * Get the value of attribute named +name+ */ static VALUE reader_attribute(VALUE self, VALUE name) { xmlTextReaderPtr reader; xmlChar *value ; VALUE rb_value; Data_Get_Struct(self, xmlTextReader, reader); if (NIL_P(name)) { return Qnil; } name = StringValue(name) ; value = xmlTextReaderGetAttribute(reader, (xmlChar *)StringValueCStr(name)); if (value == NULL) { return Qnil; } rb_value = NOKOGIRI_STR_NEW2(value); xmlFree(value); return rb_value; } /* * call-seq: * attribute_count * * Get the number of attributes for the current node */ static VALUE attribute_count(VALUE self) { xmlTextReaderPtr reader; int count; Data_Get_Struct(self, xmlTextReader, reader); count = xmlTextReaderAttributeCount(reader); if (count == -1) { return Qnil; } return INT2NUM((long)count); } /* * call-seq: * depth * * Get the depth of the node */ static VALUE depth(VALUE self) { xmlTextReaderPtr reader; int depth; Data_Get_Struct(self, xmlTextReader, reader); depth = xmlTextReaderDepth(reader); if (depth == -1) { return Qnil; } return INT2NUM((long)depth); } /* * call-seq: * xml_version * * Get the XML version of the document being read */ static VALUE xml_version(VALUE self) { xmlTextReaderPtr reader; const char *version; Data_Get_Struct(self, xmlTextReader, reader); version = (const char *)xmlTextReaderConstXmlVersion(reader); if (version == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(version); } /* * call-seq: * lang * * Get the xml:lang scope within which the node resides. */ static VALUE lang(VALUE self) { xmlTextReaderPtr reader; const char *lang; Data_Get_Struct(self, xmlTextReader, reader); lang = (const char *)xmlTextReaderConstXmlLang(reader); if (lang == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(lang); } /* * call-seq: * value * * Get the text value of the node if present. Returns a utf-8 encoded string. */ static VALUE value(VALUE self) { xmlTextReaderPtr reader; const char *value; Data_Get_Struct(self, xmlTextReader, reader); value = (const char *)xmlTextReaderConstValue(reader); if (value == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(value); } /* * call-seq: * prefix * * Get the shorthand reference to the namespace associated with the node. */ static VALUE prefix(VALUE self) { xmlTextReaderPtr reader; const char *prefix; Data_Get_Struct(self, xmlTextReader, reader); prefix = (const char *)xmlTextReaderConstPrefix(reader); if (prefix == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(prefix); } /* * call-seq: * namespace_uri * * Get the URI defining the namespace associated with the node */ static VALUE namespace_uri(VALUE self) { xmlTextReaderPtr reader; const char *uri; Data_Get_Struct(self, xmlTextReader, reader); uri = (const char *)xmlTextReaderConstNamespaceUri(reader); if (uri == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(uri); } /* * call-seq: * local_name * * Get the local name of the node */ static VALUE local_name(VALUE self) { xmlTextReaderPtr reader; const char *name; Data_Get_Struct(self, xmlTextReader, reader); name = (const char *)xmlTextReaderConstLocalName(reader); if (name == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(name); } /* * call-seq: * name * * Get the name of the node. Returns a utf-8 encoded string. */ static VALUE name(VALUE self) { xmlTextReaderPtr reader; const char *name; Data_Get_Struct(self, xmlTextReader, reader); name = (const char *)xmlTextReaderConstName(reader); if (name == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(name); } /* * call-seq: * base_uri * * Get the xml:base of the node */ static VALUE base_uri(VALUE self) { xmlTextReaderPtr reader; const char *base_uri; Data_Get_Struct(self, xmlTextReader, reader); base_uri = (const char *)xmlTextReaderBaseUri(reader); if (base_uri == NULL) { return Qnil; } return NOKOGIRI_STR_NEW2(base_uri); } /* * call-seq: * state * * Get the state of the reader */ static VALUE state(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); return INT2NUM((long)xmlTextReaderReadState(reader)); } /* * call-seq: * node_type * * Get the type of readers current node */ static VALUE node_type(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); return INT2NUM((long)xmlTextReaderNodeType(reader)); } /* * call-seq: * read * * Move the Reader forward through the XML document. */ static VALUE read_more(VALUE self) { xmlTextReaderPtr reader; xmlErrorPtr error; VALUE error_list; int ret; Data_Get_Struct(self, xmlTextReader, reader); error_list = rb_funcall(self, rb_intern("errors"), 0); xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher); ret = xmlTextReaderRead(reader); xmlSetStructuredErrorFunc(NULL, NULL); if (ret == 1) { return self; } if (ret == 0) { return Qnil; } error = xmlGetLastError(); if (error) { rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error)); } else { rb_raise(rb_eRuntimeError, "Error pulling: %d", ret); } return Qnil; } /* * call-seq: * inner_xml * * Read the contents of the current node, including child nodes and markup. * Returns a utf-8 encoded string. */ static VALUE inner_xml(VALUE self) { xmlTextReaderPtr reader; xmlChar *value; VALUE str; Data_Get_Struct(self, xmlTextReader, reader); value = xmlTextReaderReadInnerXml(reader); str = Qnil; if (value) { str = NOKOGIRI_STR_NEW2((char *)value); xmlFree(value); } return str; } /* * call-seq: * outer_xml * * Read the current node and its contents, including child nodes and markup. * Returns a utf-8 encoded string. */ static VALUE outer_xml(VALUE self) { xmlTextReaderPtr reader; xmlChar *value; VALUE str = Qnil; Data_Get_Struct(self, xmlTextReader, reader); value = xmlTextReaderReadOuterXml(reader); if (value) { str = NOKOGIRI_STR_NEW2((char *)value); xmlFree(value); } return str; } /* * call-seq: * from_memory(string, url = nil, encoding = nil, options = 0) * * Create a new reader that parses +string+ */ static VALUE from_memory(int argc, VALUE *argv, VALUE klass) { VALUE rb_buffer, rb_url, encoding, rb_options; xmlTextReaderPtr reader; const char *c_url = NULL; const char *c_encoding = NULL; int c_options = 0; VALUE rb_reader, args[3]; rb_scan_args(argc, argv, "13", &rb_buffer, &rb_url, &encoding, &rb_options); if (!RTEST(rb_buffer)) { rb_raise(rb_eArgError, "string cannot be nil"); } if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); } if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); } if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); } reader = xmlReaderForMemory( StringValuePtr(rb_buffer), (int)RSTRING_LEN(rb_buffer), c_url, c_encoding, c_options ); if (reader == NULL) { xmlFreeTextReader(reader); rb_raise(rb_eRuntimeError, "couldn't create a parser"); } rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader); args[0] = rb_buffer; args[1] = rb_url; args[2] = encoding; rb_obj_call_init(rb_reader, 3, args); return rb_reader; } /* * call-seq: * from_io(io, url = nil, encoding = nil, options = 0) * * Create a new reader that parses +io+ */ static VALUE from_io(int argc, VALUE *argv, VALUE klass) { VALUE rb_io, rb_url, encoding, rb_options; xmlTextReaderPtr reader; const char *c_url = NULL; const char *c_encoding = NULL; int c_options = 0; VALUE rb_reader, args[3]; rb_scan_args(argc, argv, "13", &rb_io, &rb_url, &encoding, &rb_options); if (!RTEST(rb_io)) { rb_raise(rb_eArgError, "io cannot be nil"); } if (RTEST(rb_url)) { c_url = StringValueCStr(rb_url); } if (RTEST(encoding)) { c_encoding = StringValueCStr(encoding); } if (RTEST(rb_options)) { c_options = (int)NUM2INT(rb_options); } reader = xmlReaderForIO( (xmlInputReadCallback)noko_io_read, (xmlInputCloseCallback)noko_io_close, (void *)rb_io, c_url, c_encoding, c_options ); if (reader == NULL) { xmlFreeTextReader(reader); rb_raise(rb_eRuntimeError, "couldn't create a parser"); } rb_reader = Data_Wrap_Struct(klass, NULL, dealloc, reader); args[0] = rb_io; args[1] = rb_url; args[2] = encoding; rb_obj_call_init(rb_reader, 3, args); return rb_reader; } /* * call-seq: * reader.empty_element? # => true or false * * Returns true if the current node is empty, otherwise false. */ static VALUE empty_element_p(VALUE self) { xmlTextReaderPtr reader; Data_Get_Struct(self, xmlTextReader, reader); if (xmlTextReaderIsEmptyElement(reader)) { return Qtrue; } return Qfalse; } void noko_init_xml_reader() { /* * The Reader parser allows you to effectively pull parse an XML document. * Once instantiated, call Nokogiri::XML::Reader#each to iterate over each * node. Note that you may only iterate over the document once! */ cNokogiriXmlReader = rb_define_class_under(mNokogiriXml, "Reader", rb_cObject); rb_undef_alloc_func(cNokogiriXmlReader); rb_define_singleton_method(cNokogiriXmlReader, "from_memory", from_memory, -1); rb_define_singleton_method(cNokogiriXmlReader, "from_io", from_io, -1); rb_define_method(cNokogiriXmlReader, "attribute", reader_attribute, 1); rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1); rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0); rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0); rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0); rb_define_method(cNokogiriXmlReader, "base_uri", base_uri, 0); rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0); rb_define_method(cNokogiriXmlReader, "depth", depth, 0); rb_define_method(cNokogiriXmlReader, "empty_element?", empty_element_p, 0); rb_define_method(cNokogiriXmlReader, "inner_xml", inner_xml, 0); rb_define_method(cNokogiriXmlReader, "lang", lang, 0); rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0); rb_define_method(cNokogiriXmlReader, "name", name, 0); rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0); rb_define_method(cNokogiriXmlReader, "namespaces", namespaces, 0); rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0); rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0); rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0); rb_define_method(cNokogiriXmlReader, "read", read_more, 0); rb_define_method(cNokogiriXmlReader, "state", state, 0); rb_define_method(cNokogiriXmlReader, "value", value, 0); rb_define_method(cNokogiriXmlReader, "value?", value_eh, 0); rb_define_method(cNokogiriXmlReader, "xml_version", xml_version, 0); }