require 'stringio' require 'nokogiri/xml/node/save_options' module Nokogiri module XML #### # Nokogiri::XML::Node is your window to the fun filled world of dealing # with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly # to a hash with regard to attributes. For example (from irb): # # irb(main):004:0> node # => link # irb(main):005:0> node['href'] # => "#foo" # irb(main):006:0> node.keys # => ["href", "id"] # irb(main):007:0> node.values # => ["#foo", "link"] # irb(main):008:0> node['class'] = 'green' # => "green" # irb(main):009:0> node # => link # irb(main):010:0> # # See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information. # # Nokogiri::XML::Node also has methods that let you move around your # tree. For navigating your tree, see: # # * Nokogiri::XML::Node#parent # * Nokogiri::XML::Node#children # * Nokogiri::XML::Node#next # * Nokogiri::XML::Node#previous # # You may search this node's subtree using Node#xpath and Node#css class Node ELEMENT_NODE = 1 ATTRIBUTE_NODE = 2 TEXT_NODE = 3 CDATA_SECTION_NODE = 4 ENTITY_REF_NODE = 5 ENTITY_NODE = 6 PI_NODE = 7 COMMENT_NODE = 8 DOCUMENT_NODE = 9 DOCUMENT_TYPE_NODE = 10 DOCUMENT_FRAG_NODE = 11 NOTATION_NODE = 12 HTML_DOCUMENT_NODE = 13 DTD_NODE = 14 ELEMENT_DECL = 15 ATTRIBUTE_DECL = 16 ENTITY_DECL = 17 NAMESPACE_DECL = 18 XINCLUDE_START = 19 XINCLUDE_END = 20 DOCB_DOCUMENT_NODE = 21 # The Document associated with this Node. attr_accessor :document ### # Decorate this node with the decorators set up in this node's Document def decorate! document.decorate(self) if document end ### # Search this node for +paths+. +paths+ can be XPath or CSS, and an # optional hash of namespaces may be appended. # See Node#xpath and Node#css. def search *paths ns = paths.last.is_a?(Hash) ? paths.pop : (document.root ? document.root.namespaces : {}) xpath(*(paths.map { |path| path = path.to_s path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for( path, :prefix => ".//", :ns => ns ) }.flatten.uniq) + [ns]) end alias :/ :search ### # Search this node for XPath +paths+. +paths+ must be one or more XPath # queries. A hash of namespaces may be appended. For example: # # node.xpath('.//title') # node.xpath('.//foo:name', { 'foo' => 'http://example.org/' }) # node.xpath('.//xmlns:name', node.root.namespaces) # # Custom XPath functions may also be defined. To define custom functions # create a class and implement the # function you want to define. # For example: # # node.xpath('.//title[regex(., "\w+")]', Class.new { # def regex node_set, regex # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ } # end # }.new) # def xpath *paths # Pop off our custom function handler if it exists handler = ![ Hash, String, Symbol ].include?(paths.last.class) ? paths.pop : nil ns = paths.last.is_a?(Hash) ? paths.pop : (document.root ? document.root.namespaces : {}) return NodeSet.new(document) unless document sets = paths.map { |path| ctx = XPathContext.new(self) ctx.register_namespaces(ns) set = ctx.evaluate(path, handler).node_set set.document = document document.decorate(set) set } return sets.first if sets.length == 1 NodeSet.new(document) do |combined| document.decorate(combined) sets.each do |set| set.each do |node| combined << node end end end end ### # Search this node for CSS +rules+. +rules+ must be one or more CSS # selectors. For example: # # node.css('title') # node.css('body h1.bold') # node.css('div + p.green', 'div#one') # # Custom CSS pseudo classes may also be defined. To define custom pseudo # classes, create a class and implement the custom pseudo class you # want defined. The first argument to the method will be the current # matching NodeSet. Any other arguments are ones that you pass in. # For example: # # node.css('title:regex("\w+")', Class.new { # def regex node_set, regex # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ } # end # }) # def css *rules # Pop off our custom function handler if it exists handler = ![ Hash, String, Symbol ].include?(rules.last.class) ? rules.pop : nil ns = rules.last.is_a?(Hash) ? rules.pop : (document.root ? document.root.namespaces : {}) rules = rules.map { |rule| CSS.xpath_for(rule, :prefix => ".//", :ns => ns) }.flatten.uniq + [ns, handler].compact xpath(*rules) end ### # Search for the first occurrence of +path+. # Returns nil if nothing is found, otherwise a Node. def at path, ns = document.root ? document.root.namespaces : {} search(path, ns).first end ### # Get the attribute value for the attribute +name+ def [] name return nil unless key?(name.to_s) get(name.to_s) end alias :next :next_sibling alias :previous :previous_sibling alias :remove :unlink alias :get_attribute :[] alias :set_attribute :[]= alias :text :content alias :inner_text :content alias :has_attribute? :key? alias :<< :add_child alias :name :node_name alias :name= :node_name= alias :type :node_type alias :to_str :text #### # Returns a hash containing the node's attributes. The key is the # attribute name, the value is the string value of the attribute. def attributes Hash[*(attribute_nodes.map { |node| [node.node_name, node] }.flatten)] end ### # Get the attribute values for this Node. def values attribute_nodes.map { |node| node.value } end ### # Get the attribute names for this Node. def keys attribute_nodes.map { |node| node.node_name } end ### # Iterate over each attribute name and value pair for this Node. def each &block attribute_nodes.each { |node| block.call(node.node_name, node.value) } end ### # Remove the attribute named +name+ def remove_attribute name attributes[name].remove if key? name end alias :delete :remove_attribute #### # Create nodes from +data+ and insert them before this node # (as a sibling). def before data fragment(data).children.each do |node| add_previous_sibling node end self end #### # Create nodes from +data+ and insert them after this node # (as a sibling). def after data fragment(data).children.to_a.reverse.each do |node| add_next_sibling node end self end #### # Swap this Node for new nodes made from +data+ def swap data before(data) remove self end #### # Set the inner_html for this Node to +tags+ def inner_html= tags children.each { |x| x.remove} fragment(tags).children.to_a.reverse.each do |node| add_child node end self end #### # Create a Nokogiri::XML::DocumentFragment from +tags+ def fragment tags classes = document.class.name.split('::') classes[-1] = 'SAX::Parser' fragment = DocumentFragment.new(self.document) parser = eval(classes.join('::')).new( FragmentHandler.new(fragment, tags) ) parser.parse(tags) fragment end #### # Set the content to +string+. def content= string self.native_content = encode_special_chars(string.to_s) end ### # Set the parent Node for this Node def parent= parent_node parent_node.add_child(self) parent_node end # Returns true if this is a Comment def comment? type == COMMENT_NODE end # Returns true if this is a CDATA def cdata? type == CDATA_SECTION_NODE end # Returns true if this is an XML::Document node def xml? type == DOCUMENT_NODE end # Returns true if this is an HTML::Document node def html? type == HTML_DOCUMENT_NODE end # Returns true if this is a Text node def text? type == TEXT_NODE end def read_only? # According to gdome2, these are read-only node types [NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type) end # Returns true if this is an Element node def element? type == ELEMENT_NODE end alias :elem? :element? def to_s document.xml? ? to_xml : to_html end def inner_html children.map { |x| x.to_html }.join end # Get the path to this node as a CSS expression def css_path path.split(/\//).map { |part| part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)') }.compact.join(' > ') end # recursively get all namespaces from this node and its subtree def collect_namespaces # TODO: print warning message if a prefix refers to more than one URI in the document? ns = {} traverse {|j| ns.merge!(j.namespaces)} ns end ### # Get a list of ancestor Node for this Node def ancestors return [] unless respond_to?(:parent) parents = [parent] while parents.last.respond_to?(:parent) parents << parents.last.parent end parents end #### # Yields self and all children to +block+ recursively. def traverse(&block) children.each{|j| j.traverse(&block) } block.call(self) end #### # replace node with the new node in the document. def replace(new_node) if new_node.is_a?(Document) || !new_node.is_a?(XML::Node) raise ArgumentError, <<-EOERR Node.replace requires a Node argument, and cannot accept a Document. (You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().) EOERR end replace_with_node new_node end ### # Test to see if this Node is equal to +other+ def == other return false unless other return false unless other.respond_to?(:pointer_id) pointer_id == other.pointer_id end ### # Serialize Node using +encoding+ and +save_options+. Save options # can also be set using a block. See SaveOptions. # # These two statements are equivalent: # # node.serialize('UTF-8', FORMAT | AS_XML) # # or # # node.serialize('UTF-8') do |config| # config.format.as_xml # end # def serialize encoding = nil, save_options = SaveOptions::FORMAT, &block io = StringIO.new write_to io, encoding, save_options, &block io.rewind io.read end ### # Serialize this Node to HTML using +encoding+ def to_html encoding = nil # FIXME: this is a hack around broken libxml versions return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1] serialize(encoding, SaveOptions::FORMAT | SaveOptions::NO_DECLARATION | SaveOptions::NO_EMPTY_TAGS | SaveOptions::AS_HTML) end ### # Serialize this Node to XML using +encoding+ def to_xml encoding = nil serialize(encoding, SaveOptions::FORMAT | SaveOptions::AS_XML) end ### # Serialize this Node to XML using +encoding+ def to_xhtml encoding = nil # FIXME: this is a hack around broken libxml versions return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1] serialize(encoding, SaveOptions::FORMAT | SaveOptions::NO_DECLARATION | SaveOptions::NO_EMPTY_TAGS | SaveOptions::AS_XHTML) end ### # Write Node to +io+ with +encoding+ and +save_options+ def write_to io, encoding = nil, save_options = SaveOptions::FORMAT config = SaveOptions.new(save_options) yield config if block_given? native_write_to(io, encoding, config.options) end ### # Write Node as HTML to +io+ with +encoding+ def write_html_to io, encoding = nil write_to io, encoding, SaveOptions::FORMAT | SaveOptions::NO_DECLARATION | SaveOptions::NO_EMPTY_TAGS | SaveOptions::AS_HTML end ### # Write Node as XHTML to +io+ with +encoding+ def write_xhtml_to io, encoding = nil write_to io, encoding, SaveOptions::FORMAT | SaveOptions::NO_DECLARATION | SaveOptions::NO_EMPTY_TAGS | SaveOptions::AS_XHTML end ### # Write Node as XML to +io+ with +encoding+ def write_xml_to io, encoding = nil write_to io, encoding, SaveOptions::FORMAT | SaveOptions::AS_XML end # Create a new node from +string+ # # THIS METHOD IS DEPRECATED # This method is deprecated and will be removed in 1.3.0 or by # March 1, 2009. Instead, use Nokogiri::XML::Node#fragment() def self.new_from_str string $stderr.puts("This method is deprecated and will be removed in 1.3.0 or by March 1, 2009. Instead, use Nokogiri::XML::Node#fragment") Nokogiri::HTML.fragment(string).first end end end end