require 'stringio'
require 'nokogiri/xml/node/save_options'
module Nokogiri
module XML
####
# Nokogiri::XML::Node is your window to the fun filled world of dealing
# with XML and HTML tags. A Nokogiri::XML::Node may be treated similarly
# to a hash with regard to attributes. For example (from irb):
#
# irb(main):004:0> node
# => link
# irb(main):005:0> node['href']
# => "#foo"
# irb(main):006:0> node.keys
# => ["href", "id"]
# irb(main):007:0> node.values
# => ["#foo", "link"]
# irb(main):008:0> node['class'] = 'green'
# => "green"
# irb(main):009:0> node
# => link
# irb(main):010:0>
#
# See Nokogiri::XML::Node#[] and Nokogiri::XML#[]= for more information.
#
# Nokogiri::XML::Node also has methods that let you move around your
# tree. For navigating your tree, see:
#
# * Nokogiri::XML::Node#parent
# * Nokogiri::XML::Node#children
# * Nokogiri::XML::Node#next
# * Nokogiri::XML::Node#previous
#
# You may search this node's subtree using Node#xpath and Node#css
class Node
include Nokogiri::XML::PP::Node
# Element node type, see Nokogiri::XML::Node#element?
ELEMENT_NODE = 1
# Attribute node type
ATTRIBUTE_NODE = 2
# Text node type, see Nokogiri::XML::Node#text?
TEXT_NODE = 3
# CDATA node type, see Nokogiri::XML::Node#cdata?
CDATA_SECTION_NODE = 4
# Entity reference node type
ENTITY_REF_NODE = 5
# Entity node type
ENTITY_NODE = 6
# PI node type
PI_NODE = 7
# Comment node type, see Nokogiri::XML::Node#comment?
COMMENT_NODE = 8
# Document node type, see Nokogiri::XML::Node#xml?
DOCUMENT_NODE = 9
# Document type node type
DOCUMENT_TYPE_NODE = 10
# Document fragment node type
DOCUMENT_FRAG_NODE = 11
# Notation node type
NOTATION_NODE = 12
# HTML document node type, see Nokogiri::XML::Node#html?
HTML_DOCUMENT_NODE = 13
# DTD node type
DTD_NODE = 14
# Element declaration type
ELEMENT_DECL = 15
# Attribute declaration type
ATTRIBUTE_DECL = 16
# Entity declaration type
ENTITY_DECL = 17
# Namespace declaration type
NAMESPACE_DECL = 18
# XInclude start type
XINCLUDE_START = 19
# XInclude end type
XINCLUDE_END = 20
# DOCB document node type
DOCB_DOCUMENT_NODE = 21
def initialize name, document
# ... Ya. This is empty on purpose.
end
###
# Decorate this node with the decorators set up in this node's Document
def decorate!
document.decorate(self)
end
###
# Search this node for +paths+. +paths+ can be XPath or CSS, and an
# optional hash of namespaces may be appended.
# See Node#xpath and Node#css.
def search *paths
ns = paths.last.is_a?(Hash) ? paths.pop :
(document.root ? document.root.namespaces : {})
xpath(*(paths.map { |path|
path = path.to_s
path =~ /^(\.\/|\/)/ ? path : CSS.xpath_for(
path,
:prefix => ".//",
:ns => ns
)
}.flatten.uniq) + [ns])
end
alias :/ :search
###
# Search this node for XPath +paths+. +paths+ must be one or more XPath
# queries. A hash of namespaces may be appended. For example:
#
# node.xpath('.//title')
# node.xpath('.//foo:name', { 'foo' => 'http://example.org/' })
# node.xpath('.//xmlns:name', node.root.namespaces)
#
# Custom XPath functions may also be defined. To define custom functions
# create a class and implement the # function you want to define.
# For example:
#
# node.xpath('.//title[regex(., "\w+")]', Class.new {
# def regex node_set, regex
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
# end
# }.new)
#
def xpath *paths
# Pop off our custom function handler if it exists
handler = ![
Hash, String, Symbol
].include?(paths.last.class) ? paths.pop : nil
ns = paths.last.is_a?(Hash) ? paths.pop :
(document.root ? document.root.namespaces : {})
return NodeSet.new(document) unless document
sets = paths.map { |path|
ctx = XPathContext.new(self)
ctx.register_namespaces(ns)
set = ctx.evaluate(path, handler).node_set
set.document = document
document.decorate(set)
set
}
return sets.first if sets.length == 1
NodeSet.new(document) do |combined|
document.decorate(combined)
sets.each do |set|
set.each do |node|
combined << node
end
end
end
end
###
# Search this node for CSS +rules+. +rules+ must be one or more CSS
# selectors. For example:
#
# node.css('title')
# node.css('body h1.bold')
# node.css('div + p.green', 'div#one')
#
# Custom CSS pseudo classes may also be defined. To define custom pseudo
# classes, create a class and implement the custom pseudo class you
# want defined. The first argument to the method will be the current
# matching NodeSet. Any other arguments are ones that you pass in.
# For example:
#
# node.css('title:regex("\w+")', Class.new {
# def regex node_set, regex
# node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
# end
# }.new)
#
def css *rules
# Pop off our custom function handler if it exists
handler = ![
Hash, String, Symbol
].include?(rules.last.class) ? rules.pop : nil
ns = rules.last.is_a?(Hash) ? rules.pop :
(document.root ? document.root.namespaces : {})
rules = rules.map { |rule|
CSS.xpath_for(rule, :prefix => ".//", :ns => ns)
}.flatten.uniq + [ns, handler].compact
xpath(*rules)
end
###
# Search this node's immidiate children using CSS selector +selector+
def > selector
ns = document.root.namespaces
xpath CSS.xpath_for(selector, :prefix => "./", :ns => ns).first
end
###
# Search for the first occurrence of +path+.
# Returns nil if nothing is found, otherwise a Node.
def at path, ns = document.root ? document.root.namespaces : {}
search(path, ns).first
end
alias :% :at
##
# Search this node for the first occurrence of XPath +paths+.
# Equivalent to xpath(paths).first
# See Node#xpath for more information.
#
def at_xpath *paths
xpath(*paths).first
end
##
# Search this node for the first occurrence of CSS +rules+.
# Equivalent to css(rules).first
# See Node#css for more information.
#
def at_css *rules
css(*rules).first
end
###
# Get the attribute value for the attribute +name+
def [] name
return nil unless key?(name.to_s)
get(name.to_s)
end
###
# Add +node+ as a child of this Node.
# The new node must be a Nokogiri::XML::Node or a non-empty String.
# Returns the new child node.
def add_child(node)
Node.verify_nodeishness(node)
if node.type == DOCUMENT_FRAG_NODE
node.children.each do |child|
add_child_node child
end
else
add_child_node node
end
end
###
# Insert +node+ before this Node (as a sibling).
def add_previous_sibling(node)
Node.verify_nodeishness(node)
if node.type == DOCUMENT_FRAG_NODE
node.children.each do |child|
add_previous_sibling_node child
end
else
add_previous_sibling_node node
end
end
###
# Insert +node+ after this Node (as a sibling).
def add_next_sibling(node)
Node.verify_nodeishness(node)
if node.type == DOCUMENT_FRAG_NODE
node.children.reverse.each do |child|
add_next_sibling_node child
end
else
add_next_sibling_node node
end
end
alias :next :next_sibling
alias :previous :previous_sibling
# :stopdoc:
# HACK: This is to work around an RDoc bug
alias :next= :add_next_sibling
# :startdoc:
alias :previous= :add_previous_sibling
alias :remove :unlink
alias :get_attribute :[]
alias :attr :[]
alias :set_attribute :[]=
alias :text :content
alias :inner_text :content
alias :has_attribute? :key?
alias :<< :add_child
alias :name :node_name
alias :name= :node_name=
alias :type :node_type
alias :to_str :text
alias :clone :dup
####
# Returns a hash containing the node's attributes. The key is
# the attribute name, the value is a Nokogiri::XML::Attr
# representing the attribute.
def attributes
Hash[*(attribute_nodes.map { |node|
[node.node_name, node]
}.flatten)]
end
###
# Get the attribute values for this Node.
def values
attribute_nodes.map { |node| node.value }
end
###
# Get the attribute names for this Node.
def keys
attribute_nodes.map { |node| node.node_name }
end
###
# Iterate over each attribute name and value pair for this Node.
def each &block
attribute_nodes.each { |node|
block.call(node.node_name, node.value)
}
end
###
# Remove the attribute named +name+
def remove_attribute name
attributes[name].remove if key? name
end
alias :delete :remove_attribute
###
# Returns true if this Node matches +selector+
def matches? selector
ancestors.last.search(selector).include?(self)
end
####
# Create nodes from +data+ and insert them before this node
# (as a sibling).
def before data
fragment(data).children.each do |node|
add_previous_sibling node
end
self
end
####
# Create nodes from +data+ and insert them after this node
# (as a sibling).
def after data
fragment(data).children.to_a.reverse.each do |node|
add_next_sibling node
end
self
end
####
# Swap this Node for new nodes made from +data+
def swap data
before(data)
remove
self
end
####
# Set the inner_html for this Node to +tags+
def inner_html= tags
children.each { |x| x.remove}
fragment(tags).children.to_a.each do |node|
add_child node
end
self
end
def fragment tags # :nodoc:
# TODO: deprecate?
document.fragment(tags)
end
####
# Set the Node content to +string+. The content gets XML escaped.
def content= string
self.native_content = encode_special_chars(string.to_s)
end
###
# Set the parent Node for this Node
def parent= parent_node
parent_node.add_child(self)
parent_node
end
###
# Get a hash containing the Namespace definitions for this Node
def namespaces
Hash[*namespace_definitions.map { |nd|
key = ['xmlns', nd.prefix].compact.join(':')
if RUBY_VERSION >= '1.9' && document.encoding
begin
key.force_encoding document.encoding
rescue ArgumentError
end
end
[key, nd.href]
}.flatten]
end
# Returns true if this is a Comment
def comment?
type == COMMENT_NODE
end
# Returns true if this is a CDATA
def cdata?
type == CDATA_SECTION_NODE
end
# Returns true if this is an XML::Document node
def xml?
type == DOCUMENT_NODE
end
# Returns true if this is an HTML::Document node
def html?
type == HTML_DOCUMENT_NODE
end
# Returns true if this is a Text node
def text?
type == TEXT_NODE
end
###
# Fetch the Nokogiri::HTML::ElementDescription for this node. Returns
# nil on XML documents and on unknown tags.
def description
return nil if document.xml?
Nokogiri::HTML::ElementDescription[name]
end
###
# Is this a read only node?
def read_only?
# According to gdome2, these are read-only node types
[NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
end
# Returns true if this is an Element node
def element?
type == ELEMENT_NODE
end
alias :elem? :element?
###
# Turn this node in to a string. If the document is HTML, this method
# returns html. If the document is XML, this method returns XML.
def to_s
document.xml? ? to_xml : to_html
end
# Get the inner_html for this node's Node#children
def inner_html *args
children.map { |x| x.to_html(*args) }.join
end
# Get the path to this node as a CSS expression
def css_path
path.split(/\//).map { |part|
part.length == 0 ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
}.compact.join(' > ')
end
###
# Get a list of ancestor Node for this Node. If +selector+ is given,
# the ancestors must match +selector+
def ancestors selector = nil
return NodeSet.new(document) unless respond_to?(:parent)
return NodeSet.new(document) unless parent
parents = [parent]
while parents.last.respond_to?(:parent)
break unless ctx_parent = parents.last.parent
parents << ctx_parent
end
return NodeSet.new(document, parents) unless selector
root = parents.last
NodeSet.new(document, parents.find_all { |parent|
root.search(selector).include?(parent)
})
end
###
# Set the default namespace for this node to +url+
def default_namespace= url
add_namespace_definition(nil, url)
end
alias :add_namespace :add_namespace_definition
###
# Set the namespace for this node to +ns+
def namespace= ns
if ns.document != document
raise ArgumentError, 'namespace must be declared on the same document'
end
unless ns.is_a? Nokogiri::XML::Namespace
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
end
set_namespace ns
end
####
# Yields self and all children to +block+ recursively.
def traverse &block
children.each{|j| j.traverse(&block) }
block.call(self)
end
###
# Accept a visitor. This method calls "visit" on +visitor+ with self.
def accept visitor
visitor.visit(self)
end
####
# +replace+ this Node with the +node+ in the Document.
# The new node must be a Nokogiri::XML::Node or a non-empty String.
# Returns the new child node.
def replace node
Node.verify_nodeishness(node)
if node.type == DOCUMENT_FRAG_NODE
node.children.each do |child|
add_previous_sibling child
end
unlink
else
replace_node node
end
end
###
# Test to see if this Node is equal to +other+
def == other
return false unless other
return false unless other.respond_to?(:pointer_id)
pointer_id == other.pointer_id
end
###
# Serialize Node using +options+. Save options can also be set using a
# block. See SaveOptions.
#
# These two statements are equivalent:
#
# node.serialize(:encoding => 'UTF-8', :save_with => FORMAT | AS_XML)
#
# or
#
# node.serialize(:encoding => 'UTF-8') do |config|
# config.format.as_xml
# end
#
def serialize *args, &block
options = args.first.is_a?(Hash) ? args.shift : {
:encoding => args[0],
:save_with => args[1] || SaveOptions::FORMAT
}
encoding = options[:encoding] || document.encoding
outstring = ""
if encoding && outstring.respond_to?(:force_encoding)
outstring.force_encoding(Encoding.find(encoding))
end
io = StringIO.new(outstring)
write_to io, options, &block
io.string
end
###
# Serialize this Node to HTML
#
# doc.to_html
#
# See Node#write_to for a list of +options+. For formatted output,
# use Node#to_xhtml instead.
def to_html options = {}
# FIXME: this is a hack around broken libxml versions
return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
options[:save_with] ||= SaveOptions::FORMAT |
SaveOptions::NO_DECLARATION |
SaveOptions::NO_EMPTY_TAGS |
SaveOptions::AS_HTML
serialize(options)
end
###
# Serialize this Node to XML using +options+
#
# doc.to_xml(:indent => 5, :encoding => 'UTF-8')
#
# See Node#write_to for a list of +options+
def to_xml options = {}
encoding = nil
options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
serialize(options)
end
###
# Serialize this Node to XHTML using +options+
#
# doc.to_xhtml(:indent => 5, :encoding => 'UTF-8')
#
# See Node#write_to for a list of +options+
def to_xhtml options = {}
# FIXME: this is a hack around broken libxml versions
return dump_html if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
options[:save_with] ||= SaveOptions::FORMAT |
SaveOptions::NO_DECLARATION |
SaveOptions::NO_EMPTY_TAGS |
SaveOptions::AS_XHTML
serialize(options)
end
###
# Write Node to +io+ with +options+. +options+ modify the output of
# this method. Valid options are:
#
# * +:encoding+ for changing the encoding
# * +:indent_text+ the indentation text, defaults to one space
# * +:indent+ the number of +:indent_text+ to use, defaults to 2
# * +:save_with+ a combination of SaveOptions constants.
#
# To save with UTF-8 indented twice:
#
# node.write_to(io, :encoding => 'UTF-8', :indent => 2)
#
# To save indented with two dashes:
#
# node.write_to(io, :indent_text => '-', :indent => 2
#
def write_to io, *options
options = options.first.is_a?(Hash) ? options.shift : {}
encoding = options[:encoding] || options[0]
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
indent_text = options[:indent_text] || ' '
indent_times = options[:indent] || 2
config = SaveOptions.new(save_options)
yield config if block_given?
native_write_to(io, encoding, indent_text * indent_times, config.options)
end
###
# Write Node as HTML to +io+ with +options+
#
# See Node#write_to for a list of +options+
def write_html_to io, options = {}
# FIXME: this is a hack around broken libxml versions
return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
options[:save_with] ||= SaveOptions::FORMAT |
SaveOptions::NO_DECLARATION |
SaveOptions::NO_EMPTY_TAGS |
SaveOptions::AS_HTML
write_to io, options
end
###
# Write Node as XHTML to +io+ with +options+
#
# See Node#write_to for a list of +options+
def write_xhtml_to io, options = {}
# FIXME: this is a hack around broken libxml versions
return (io << dump_html) if %w[2 6] === LIBXML_VERSION.split('.')[0..1]
options[:save_with] ||= SaveOptions::FORMAT |
SaveOptions::NO_DECLARATION |
SaveOptions::NO_EMPTY_TAGS |
SaveOptions::AS_XHTML
write_to io, options
end
###
# Write Node as XML to +io+ with +options+
#
# doc.write_xml_to io, :encoding => 'UTF-8'
#
# See Node#write_to for a list of options
def write_xml_to io, options = {}
options[:save_with] ||= SaveOptions::FORMAT | SaveOptions::AS_XML
write_to io, options
end
###
# Compare two Node objects with respect to their Document. Nodes from
# different documents cannot be compared.
def <=> other
return nil unless other.is_a?(Nokogiri::XML::Node)
return nil unless document == other.document
compare other
end
private
def self.verify_nodeishness(node)
if node.is_a?(Document) || !node.is_a?(XML::Node)
raise ArgumentError, <<-EOERR
Node.replace requires a Node argument, and cannot accept a Document.
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
EOERR
end
end
def inspect_attributes
[:name, :namespace, :attribute_nodes, :children]
end
end
end
end