# encoding: UTF-8
require 'stringio'
require 'nokogiri/xml/node/save_options'
module Nokogiri
module XML
####
# Nokogiri::XML::Node is your window to the fun filled world of dealing
# with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
# to a hash with regard to attributes. For example (from irb):
#
# irb(main):004:0> node
# => link
# irb(main):005:0> node['href']
# => "#foo"
# irb(main):006:0> node.keys
# => ["href", "id"]
# irb(main):007:0> node.values
# => ["#foo", "link"]
# irb(main):008:0> node['class'] = 'green'
# => "green"
# irb(main):009:0> node
# => link
# irb(main):010:0>
#
# See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
#
# Nokogiri::XML::Node also has methods that let you move around your
# tree. For navigating your tree, see:
#
# * Nokogiri::XML::Node#parent
# * Nokogiri::XML::Node#children
# * Nokogiri::XML::Node#next
# * Nokogiri::XML::Node#previous
#
#
# When printing or otherwise emitting a document or a node (and
# its subtree), there are a few methods you might want to use:
#
# * content, text, inner_text, to_str: emit plaintext
#
# These methods will all emit the plaintext version of your
# document, meaning that entities will be replaced (e.g., "<"
# will be replaced with "<"), meaning that any sanitizing will
# likely be un-done in the output.
#
# * to_s, to_xml, to_html, inner_html: emit well-formed markup
#
# These methods will all emit properly-escaped markup, meaning
# that it's suitable for consumption by browsers, parsers, etc.
#
# You may search this node's subtree using Searchable#xpath and Searchable#css
class Node
include Nokogiri::XML::PP::Node
include Nokogiri::XML::Searchable
include Enumerable
# Element node type, see Nokogiri::XML::Node#element?
ELEMENT_NODE = 1
# Attribute node type
ATTRIBUTE_NODE = 2
# Text node type, see Nokogiri::XML::Node#text?
TEXT_NODE = 3
# CDATA node type, see Nokogiri::XML::Node#cdata?
CDATA_SECTION_NODE = 4
# Entity reference node type
ENTITY_REF_NODE = 5
# Entity node type
ENTITY_NODE = 6
# PI node type
PI_NODE = 7
# Comment node type, see Nokogiri::XML::Node#comment?
COMMENT_NODE = 8
# Document node type, see Nokogiri::XML::Node#xml?
DOCUMENT_NODE = 9
# Document type node type
DOCUMENT_TYPE_NODE = 10
# Document fragment node type
DOCUMENT_FRAG_NODE = 11
# Notation node type
NOTATION_NODE = 12
# HTML document node type, see Nokogiri::XML::Node#html?
HTML_DOCUMENT_NODE = 13
# DTD node type
DTD_NODE = 14
# Element declaration type
ELEMENT_DECL = 15
# Attribute declaration type
ATTRIBUTE_DECL = 16
# Entity declaration type
ENTITY_DECL = 17
# Namespace declaration type
NAMESPACE_DECL = 18
# XInclude start type
XINCLUDE_START = 19
# XInclude end type
XINCLUDE_END = 20
# DOCB document node type
DOCB_DOCUMENT_NODE = 21
def initialize name, document # :nodoc:
# ... Ya. This is empty on purpose.
end
###
# Decorate this node with the decorators set up in this node's Document
def decorate!
document.decorate(self)
end
###
# Search this node's immediate children using CSS selector +selector+
def > selector
ns = document.root.namespaces
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
end
###
# Get the attribute value for the attribute +name+
def [] name
get(name.to_s)
end
###
# Set the attribute value for the attribute +name+ to +value+
def []= name, value
set name.to_s, value.to_s
end
###
# Add +node_or_tags+ as a child of this Node.
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
#
# Also see related method +<<+.
def add_child node_or_tags
node_or_tags = coerce(node_or_tags)
if node_or_tags.is_a?(XML::NodeSet)
node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
else
add_child_node_and_reparent_attrs node_or_tags
end
node_or_tags
end
###
# Add +node_or_tags+ as the first child of this Node.
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
#
# Also see related method +add_child+.
def prepend_child node_or_tags
if first = children.first
# Mimic the error add_child would raise.
raise RuntimeError, "Document already has a root node" if document? && !node_or_tags.processing_instruction?
first.__send__(:add_sibling, :previous, node_or_tags)
else
add_child(node_or_tags)
end
end
###
# Add +node_or_tags+ as a child of this Node.
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
#
# Returns self, to support chaining of calls (e.g., root << child1 << child2)
#
# Also see related method +add_child+.
def << node_or_tags
add_child node_or_tags
self
end
###
# Insert +node_or_tags+ before this Node (as a sibling).
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
#
# Also see related method +before+.
def add_previous_sibling node_or_tags
raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !node_or_tags.processing_instruction?
add_sibling :previous, node_or_tags
end
###
# Insert +node_or_tags+ after this Node (as a sibling).
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
#
# Also see related method +after+.
def add_next_sibling node_or_tags
raise ArgumentError.new("A document may not have multiple root nodes.") if (parent && parent.document?) && !node_or_tags.processing_instruction?
add_sibling :next, node_or_tags
end
####
# Insert +node_or_tags+ before this node (as a sibling).
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
#
# Returns self, to support chaining of calls.
#
# Also see related method +add_previous_sibling+.
def before node_or_tags
add_previous_sibling node_or_tags
self
end
####
# Insert +node_or_tags+ after this node (as a sibling).
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
#
# Returns self, to support chaining of calls.
#
# Also see related method +add_next_sibling+.
def after node_or_tags
add_next_sibling node_or_tags
self
end
####
# Set the inner html for this Node to +node_or_tags+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
#
# Returns self.
#
# Also see related method +children=+
def inner_html= node_or_tags
self.children = node_or_tags
self
end
####
# Set the inner html for this Node +node_or_tags+
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a string containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
#
# Also see related method +inner_html=+
def children= node_or_tags
node_or_tags = coerce(node_or_tags)
children.unlink
if node_or_tags.is_a?(XML::NodeSet)
node_or_tags.each { |n| add_child_node_and_reparent_attrs n }
else
add_child_node_and_reparent_attrs node_or_tags
end
node_or_tags
end
####
# Replace this Node with +node_or_tags+.
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is a DocumentFragment, NodeSet, or string).
#
# Also see related method +swap+.
def replace node_or_tags
# We cannot replace a text node directly, otherwise libxml will return
# an internal error at parser.c:13031, I don't know exactly why
# libxml is trying to find a parent node that is an element or document
# so I can't tell if this is bug in libxml or not. issue #775.
if text?
replacee = Nokogiri::XML::Node.new 'dummy', document
add_previous_sibling_node replacee
unlink
return replacee.replace node_or_tags
end
node_or_tags = coerce(node_or_tags)
if node_or_tags.is_a?(XML::NodeSet)
node_or_tags.each { |n| add_previous_sibling n }
unlink
else
replace_node node_or_tags
end
node_or_tags
end
####
# Swap this Node for +node_or_tags+
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a string containing markup.
#
# Returns self, to support chaining of calls.
#
# Also see related method +replace+.
def swap node_or_tags
replace node_or_tags
self
end
alias :next :next_sibling
alias :previous :previous_sibling
# :stopdoc:
# HACK: This is to work around an RDoc bug
alias :next= :add_next_sibling
# :startdoc:
alias :previous= :add_previous_sibling
alias :remove :unlink
alias :get_attribute :[]
alias :attr :[]
alias :set_attribute :[]=
alias :text :content
alias :inner_text :content
alias :has_attribute? :key?
alias :name :node_name
alias :name= :node_name=
alias :type :node_type
alias :to_str :text
alias :clone :dup
alias :elements :element_children
####
# Returns a hash containing the node's attributes. The key is
# the attribute name without any namespace, the value is a Nokogiri::XML::Attr
# representing the attribute.
# If you need to distinguish attributes with the same name, with different namespaces
# use #attribute_nodes instead.
def attributes
Hash[attribute_nodes.map { |node|
[node.node_name, node]
}]
end
###
# Get the attribute values for this Node.
def values
attribute_nodes.map(&:value)
end
###
# Get the attribute names for this Node.
def keys
attribute_nodes.map(&:node_name)
end
###
# Iterate over each attribute name and value pair for this Node.
def each
attribute_nodes.each { |node|
yield [node.node_name, node.value]
}
end
###
# Remove the attribute named +name+
def remove_attribute name
attr = attributes[name].remove if key? name
clear_xpath_context if Nokogiri.jruby?
attr
end
alias :delete :remove_attribute
###
# Returns true if this Node matches +selector+
def matches? selector
ancestors.last.search(selector).include?(self)
end
###
# Create a DocumentFragment containing +tags+ that is relative to _this_
# context node.
def fragment tags
type = document.html? ? Nokogiri::HTML : Nokogiri::XML
type::DocumentFragment.new(document, tags, self)
end
###
# Parse +string_or_io+ as a document fragment within the context of
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
# +string_or_io+.
def parse string_or_io, options = nil
##
# When the current node is unparented and not an element node, use the
# document as the parsing context instead. Otherwise, the in-context
# parser cannot find an element or a document node.
# Document Fragments are also not usable by the in-context parser.
if !element? && !document? && (!parent || parent.fragment?)
return document.parse(string_or_io, options)
end
options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
if Fixnum === options
options = Nokogiri::XML::ParseOptions.new(options)
end
# Give the options to the user
yield options if block_given?
contents = string_or_io.respond_to?(:read) ?
string_or_io.read :
string_or_io
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
##
# This is a horrible hack, but I don't care. See #313 for background.
error_count = document.errors.length
node_set = in_context(contents, options.to_i)
if node_set.empty? and document.errors.length > error_count and options.recover?
fragment = Nokogiri::HTML::DocumentFragment.parse contents
node_set = fragment.children
end
node_set
end
####
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not interpreted as markup.
def content= string
self.native_content = encode_special_chars(string.to_s)
end
###
# Set the parent Node for this Node
def parent= parent_node
parent_node.add_child(self)
parent_node
end
###
# Returns a Hash of {prefix => value} for all namespaces on this
# node and its ancestors.
#
# This method returns the same namespaces as #namespace_scopes.
#
# Returns namespaces in scope for self -- those defined on self
# element directly or any ancestor node -- as a Hash of
# attribute-name/value pairs. Note that the keys in this hash
# XML attributes that would be used to define this namespace,
# such as "xmlns:prefix", not just the prefix. Default namespace
# set on self will be included with key "xmlns". However,
# default namespaces set on ancestor will NOT be, even if self
# has no explicit default namespace.
def namespaces
Hash[namespace_scopes.map { |nd|
key = ['xmlns', nd.prefix].compact.join(':')
[key, nd.href]
}]
end
# Returns true if this is a Comment
def comment?
type == COMMENT_NODE
end
# Returns true if this is a CDATA
def cdata?
type == CDATA_SECTION_NODE
end
# Returns true if this is an XML::Document node
def xml?
type == DOCUMENT_NODE
end
# Returns true if this is an HTML::Document node
def html?
type == HTML_DOCUMENT_NODE
end
# Returns true if this is a Document
def document?
is_a? XML::Document
end
# Returns true if this is a ProcessingInstruction node
def processing_instruction?
type == PI_NODE
end
# Returns true if this is a Text node
def text?
type == TEXT_NODE
end
# Returns true if this is a DocumentFragment
def fragment?
type == DOCUMENT_FRAG_NODE
end
###
# Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
# nil on XML documents and on unknown tags.
def description
return nil if document.xml?
Nokogiri::HTML::ElementDescription[name]
end
###
# Is this a read only node?
def read_only?
# According to gdome2, these are read-only node types
[NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
end
# Returns true if this is an Element node
def element?
type == ELEMENT_NODE
end
alias :elem? :element?
###
# Turn this node in to a string. If the document is HTML, this method
# returns html. If the document is XML, this method returns XML.
def to_s
document.xml? ? to_xml : to_html
end
# Get the inner_html for this node's Node#children
def inner_html *args
children.map { |x| x.to_html(*args) }.join
end
# Get the path to this node as a CSS expression
def css_path
path.split(/\//).map { |part|
part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
}.compact.join(' > ')
end
###
# Get a list of ancestor Node for this Node. If +selector+ is given,
# the ancestors must match +selector+
def ancestors selector = nil
return NodeSet.new(document) unless respond_to?(:parent)
return NodeSet.new(document) unless parent
parents = [parent]
while parents.last.respond_to?(:parent)
break unless ctx_parent = parents.last.parent
parents << ctx_parent
end
return NodeSet.new(document, parents) unless selector
root = parents.last
search_results = root.search(selector)
NodeSet.new(document, parents.find_all { |parent|
search_results.include?(parent)
})
end
###
# Adds a default namespace supplied as a string +url+ href, to self.
# The consequence is as an xmlns attribute with supplied argument were
# present in parsed XML. A default namespace set with this method will
# now show up in #attributes, but when this node is serialized to XML an
# "xmlns" attribute will appear. See also #namespace and #namespace=
def default_namespace= url
add_namespace_definition(nil, url)
end
alias :add_namespace :add_namespace_definition
###
# Set the default namespace on this node (as would be defined with an
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
# a Namespace added this way will NOT be serialized as an xmlns attribute
# for this node. You probably want #default_namespace= instead, or perhaps
# #add_namespace_definition with a nil prefix argument.
def namespace= ns
return set_namespace(ns) unless ns
unless Nokogiri::XML::Namespace === ns
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
end
if ns.document != document
raise ArgumentError, 'namespace must be declared on the same document'
end
set_namespace ns
end
####
# Yields self and all children to +block+ recursively.
def traverse &block
children.each{|j| j.traverse(&block) }
block.call(self)
end
###
# Accept a visitor. This method calls "visit" on +visitor+ with self.
def accept visitor
visitor.visit(self)
end
###
# Test to see if this Node is equal to +other+
def == other
return false unless other
return false unless other.respond_to?(:pointer_id)
pointer_id == other.pointer_id
end
###
# Serialize Node using +options+. Save options can also be set using a
# block. See SaveOptions.
#
# These two statements are equivalent:
#
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
#
# or
#
# node.serialize(:encoding => 'UTF-8') do |config|
# config.format.as_xml
# end
#
def serialize *args, &block
options = args.first.is_a?(Hash) ? args.shift : {
:encoding => args[0],
:save_with => args[1]
}
encoding = options[:encoding] || document.encoding
options[:encoding] = encoding
outstring = ""
if encoding && outstring.respond_to?(:force_encoding)
outstring.force_encoding(Encoding.find(encoding))
end
io = StringIO.new(outstring)
write_to io, options, &block
io.string
end
###
# Serialize this Node to HTML
#
# doc.to_html
#
# See Node#write_to for a list of +options+. For formatted output,
# use Node#to_xhtml instead.
def to_html options = {}
to_format SaveOptions::DEFAULT_HTML, options
end
###
# Serialize this Node to XML using +options+
#
# doc.to_xml(:indent => 5, :encoding => 'UTF-8')
#
# See Node#write_to for a list of +options+
def to_xml options = {}
options[:save_with] ||= SaveOptions::DEFAULT_XML
serialize(options)
end
###
# Serialize this Node to XHTML using +options+
#
# doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
#
# See Node#write_to for a list of +options+
def to_xhtml options = {}
to_format SaveOptions::DEFAULT_XHTML, options
end
###
# Write Node to +io+ with +options+. +options+ modify the output of
# this method. Valid options are:
#
# * +:encoding+ for changing the encoding
# * +:indent_text+ the indentation text, defaults to one space
# * +:indent+ the number of +:indent_text+ to use, defaults to 2
# * +:save_with+ a combination of SaveOptions constants.
#
# To save with UTF-8 indented twice:
#
# node.write_to(io, :encoding => 'UTF-8', :indent => 2)
#
# To save indented with two dashes:
#
# node.write_to(io, :indent_text => '-', :indent => 2
#
def write_to io, *options
options = options.first.is_a?(Hash) ? options.shift : {}
encoding = options[:encoding] || options[0]
if Nokogiri.jruby?
save_options = options[:save_with] || options[1]
indent_times = options[:indent] || 0
else
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
indent_times = options[:indent] || 2
end
indent_text = options[:indent_text] || ' '
config = SaveOptions.new(save_options.to_i)
yield config if block_given?
native_write_to(io, encoding, indent_text * indent_times, config.options)
end
###
# Write Node as HTML to +io+ with +options+
#
# See Node#write_to for a list of +options+
def write_html_to io, options = {}
write_format_to SaveOptions::DEFAULT_HTML, io, options
end
###
# Write Node as XHTML to +io+ with +options+
#
# See Node#write_to for a list of +options+
def write_xhtml_to io, options = {}
write_format_to SaveOptions::DEFAULT_XHTML, io, options
end
###
# Write Node as XML to +io+ with +options+
#
# doc.write_xml_to io, :encoding => 'UTF-8'
#
# See Node#write_to for a list of options
def write_xml_to io, options = {}
options[:save_with] ||= SaveOptions::DEFAULT_XML
write_to io, options
end
###
# Compare two Node objects with respect to their Document. Nodes from
# different documents cannot be compared.
def <=> other
return nil unless other.is_a?(Nokogiri::XML::Node)
return nil unless document == other.document
compare other
end
###
# Do xinclude substitution on the subtree below node. If given a block, a
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
# passed to it, allowing more convenient modification of the parser options.
def do_xinclude options = XML::ParseOptions::DEFAULT_XML, &block
options = Nokogiri::XML::ParseOptions.new(options) if Fixnum === options
# give options to user
yield options if block_given?
# call c extension
process_xincludes(options.to_i)
end
def canonicalize(mode=XML::XML_C14N_1_0,inclusive_namespaces=nil,with_comments=false)
c14n_root = self
document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
tn = node.is_a?(XML::Node) ? node : parent
tn == c14n_root || tn.ancestors.include?(c14n_root)
end
end
private
def add_sibling next_or_previous, node_or_tags
impl = (next_or_previous == :next) ? :add_next_sibling_node : :add_previous_sibling_node
iter = (next_or_previous == :next) ? :reverse_each : :each
node_or_tags = coerce node_or_tags
if node_or_tags.is_a?(XML::NodeSet)
if text?
pivot = Nokogiri::XML::Node.new 'dummy', document
send impl, pivot
else
pivot = self
end
node_or_tags.send(iter) { |n| pivot.send impl, n }
pivot.unlink if text?
else
send impl, node_or_tags
end
node_or_tags
end
def to_format save_option, options
# FIXME: this is a hack around broken libxml versions
return dump_html if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
options[:save_with] = save_option unless options[:save_with]
serialize(options)
end
def write_format_to save_option, io, options
# FIXME: this is a hack around broken libxml versions
return (io << dump_html) if Nokogiri.uses_libxml? && %w[2 6] === LIBXML_VERSION.split('.')[0..1]
options[:save_with] ||= save_option
write_to io, options
end
def inspect_attributes
[:name, :namespace, :attribute_nodes, :children]
end
def coerce data # :nodoc:
case data
when XML::NodeSet
return data
when XML::DocumentFragment
return data.children
when String
return fragment(data).children
when Document, XML::Attr
# unacceptable
when XML::Node
return data
end
raise ArgumentError, <<-EOERR
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
EOERR
end
def implied_xpath_contexts # :nodoc:
[".//"]
end
def add_child_node_and_reparent_attrs node # :nodoc:
add_child_node node
node.attribute_nodes.find_all { |a| a.name =~ /:/ }.each do |attr_node|
attr_node.remove
node[attr_node.name] = attr_node.value
end
end
end
end
end