# encoding: utf-8
# frozen_string_literal: true
require "stringio"
module Nokogiri
module XML
# Nokogiri::XML::Node is the primary API you'll use to interact with your Document.
#
# == Attributes
#
# A Nokogiri::XML::Node may be treated similarly to a hash with regard to attributes. For
# example:
#
# node = Nokogiri::XML::DocumentFragment.parse("link").at_css("a")
# node.to_html # => "link"
# node['href'] # => "#foo"
# node.keys # => ["href", "id"]
# node.values # => ["#foo", "link"]
# node['class'] = 'green' # => "green"
# node.to_html # => "link"
#
# See the method group entitled Node@Working+With+Node+Attributes for the full set of methods.
#
# == Navigation
#
# Nokogiri::XML::Node also has methods that let you move around your tree:
#
# [#parent, #children, #next, #previous]
# Navigate up, down, or through siblings.
#
# See the method group entitled Node@Traversing+Document+Structure for the full set of methods.
#
# == Serialization
#
# When printing or otherwise emitting a document or a node (and its subtree), there are a few
# methods you might want to use:
#
# [#content, #text, #inner_text, #to_str]
# These methods will all **emit plaintext**,
# meaning that entities will be replaced (e.g., +<+ will be replaced with +<+), meaning
# that any sanitizing will likely be un-done in the output.
#
# [#to_s, #to_xml, #to_html, #inner_html]
# These methods will all **emit properly-escaped markup**, meaning that it's suitable for
# consumption by browsers, parsers, etc.
#
# See the method group entitled Node@Serialization+and+Generating+Output for the full set of methods.
#
# == Searching
#
# You may search this node's subtree using methods like #xpath and #css.
#
# See the method group entitled Node@Searching+via+XPath+or+CSS+Queries for the full set of methods.
#
class Node
include Nokogiri::XML::PP::Node
include Nokogiri::XML::Searchable
include Nokogiri::ClassResolver
include Enumerable
# Element node type, see Nokogiri::XML::Node#element?
ELEMENT_NODE = 1
# Attribute node type
ATTRIBUTE_NODE = 2
# Text node type, see Nokogiri::XML::Node#text?
TEXT_NODE = 3
# CDATA node type, see Nokogiri::XML::Node#cdata?
CDATA_SECTION_NODE = 4
# Entity reference node type
ENTITY_REF_NODE = 5
# Entity node type
ENTITY_NODE = 6
# PI node type
PI_NODE = 7
# Comment node type, see Nokogiri::XML::Node#comment?
COMMENT_NODE = 8
# Document node type, see Nokogiri::XML::Node#xml?
DOCUMENT_NODE = 9
# Document type node type
DOCUMENT_TYPE_NODE = 10
# Document fragment node type
DOCUMENT_FRAG_NODE = 11
# Notation node type
NOTATION_NODE = 12
# HTML document node type, see Nokogiri::XML::Node#html?
HTML_DOCUMENT_NODE = 13
# DTD node type
DTD_NODE = 14
# Element declaration type
ELEMENT_DECL = 15
# Attribute declaration type
ATTRIBUTE_DECL = 16
# Entity declaration type
ENTITY_DECL = 17
# Namespace declaration type
NAMESPACE_DECL = 18
# XInclude start type
XINCLUDE_START = 19
# XInclude end type
XINCLUDE_END = 20
# DOCB document node type
DOCB_DOCUMENT_NODE = 21
#
# :call-seq:
# new(name, document) -> Nokogiri::XML::Node
# new(name, document) { |node| ... } -> Nokogiri::XML::Node
#
# Create a new node with +name+ that belongs to +document+.
#
# If you intend to add a node to a document tree, it's likely that you will prefer one of the
# Nokogiri::XML::Node methods like #add_child, #add_next_sibling, #replace, etc. which will
# both create an element (or subtree) and place it in the document tree.
#
# Another alternative, if you are concerned about performance, is
# Nokogiri::XML::Document#create_element which accepts additional arguments for contents or
# attributes but (like this method) avoids parsing markup.
#
# [Parameters]
# - +name+ (String)
# - +document+ (Nokogiri::XML::Document) The document to which the the returned node will belong.
# [Yields] Nokogiri::XML::Node
# [Returns] Nokogiri::XML::Node
#
def initialize(name, document)
# This is intentionally empty, and sets the method signature for subclasses.
end
###
# Decorate this node with the decorators set up in this node's Document
def decorate!
document.decorate(self)
end
# :section: Manipulating Document Structure
###
# Add +node_or_tags+ as a child of this Node.
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
# containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
# a DocumentFragment, NodeSet, or String).
#
# Also see related method +<<+.
def add_child(node_or_tags)
node_or_tags = coerce(node_or_tags)
if node_or_tags.is_a?(XML::NodeSet)
node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
else
add_child_node_and_reparent_attrs(node_or_tags)
end
node_or_tags
end
###
# Add +node_or_tags+ as the first child of this Node.
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
# containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
# a DocumentFragment, NodeSet, or String).
#
# Also see related method +add_child+.
def prepend_child(node_or_tags)
if (first = children.first)
# Mimic the error add_child would raise.
raise "Document already has a root node" if document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
first.__send__(:add_sibling, :previous, node_or_tags)
else
add_child(node_or_tags)
end
end
# :call-seq:
# wrap(markup) -> self
# wrap(node) -> self
#
# Wrap this Node with the node parsed from +markup+ or a dup of the +node+.
#
# [Parameters]
# - *markup* (String)
# Markup that is parsed and used as the wrapper. This node's parent, if it exists, is used
# as the context node for parsing; otherwise the associated document is used. If the parsed
# fragment has multiple roots, the first root node is used as the wrapper.
# - *node* (Nokogiri::XML::Node)
# An element that is `#dup`ed and used as the wrapper.
#
# [Returns] +self+, to support chaining.
#
# Also see NodeSet#wrap
#
# *Example* with a +String+ argument:
#
# doc = Nokogiri::HTML5(<<~HTML)
#
# asdf
#
# HTML
# doc.at_css("a").wrap("")
# doc.to_html
# # =>
# #
# #
#
# *Example* with a +Node+ argument:
#
# doc = Nokogiri::HTML5(<<~HTML)
#
# asdf
#
# HTML
# doc.at_css("a").wrap(doc.create_element("div"))
# doc.to_html
# #
# #
# #
#
def wrap(node_or_tags)
case node_or_tags
when String
context_node = parent || document
new_parent = context_node.coerce(node_or_tags).first
if new_parent.nil?
raise "Failed to parse '#{node_or_tags}' in the context of a '#{context_node.name}' element"
end
when XML::Node
new_parent = node_or_tags.dup
else
raise ArgumentError, "Requires a String or Node argument, and cannot accept a #{node_or_tags.class}"
end
if parent
add_next_sibling(new_parent)
else
new_parent.unlink
end
new_parent.add_child(self)
self
end
###
# Add +node_or_tags+ as a child of this Node.
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
# containing markup.
#
# Returns +self+, to support chaining of calls (e.g., root << child1 << child2)
#
# Also see related method +add_child+.
def <<(node_or_tags)
add_child(node_or_tags)
self
end
###
# Insert +node_or_tags+ before this Node (as a sibling).
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
# containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
# a DocumentFragment, NodeSet, or String).
#
# Also see related method +before+.
def add_previous_sibling(node_or_tags)
raise ArgumentError,
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
add_sibling(:previous, node_or_tags)
end
###
# Insert +node_or_tags+ after this Node (as a sibling).
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
# containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
# a DocumentFragment, NodeSet, or String).
#
# Also see related method +after+.
def add_next_sibling(node_or_tags)
raise ArgumentError,
"A document may not have multiple root nodes." if parent&.document? && !(node_or_tags.comment? || node_or_tags.processing_instruction?)
add_sibling(:next, node_or_tags)
end
####
# Insert +node_or_tags+ before this node (as a sibling).
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
# containing markup.
#
# Returns +self+, to support chaining of calls.
#
# Also see related method +add_previous_sibling+.
def before(node_or_tags)
add_previous_sibling(node_or_tags)
self
end
####
# Insert +node_or_tags+ after this node (as a sibling).
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
# containing markup.
#
# Returns +self+, to support chaining of calls.
#
# Also see related method +add_next_sibling+.
def after(node_or_tags)
add_next_sibling(node_or_tags)
self
end
####
# Set the content for this Node to +node_or_tags+.
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
# containing markup.
#
# ⚠ Please note that despite the name, this method will *not* always parse a String argument
# as HTML. A String argument will be parsed with the +DocumentFragment+ parser related to this
# node's document.
#
# For example, if the document is an HTML4::Document then the string will be parsed as HTML4
# using HTML4::DocumentFragment; but if the document is an XML::Document then it will
# parse the string as XML using XML::DocumentFragment.
#
# Also see related method +children=+
def inner_html=(node_or_tags)
self.children = node_or_tags
end
####
# Set the content for this Node +node_or_tags+
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a Nokogiri::XML::DocumentFragment, or a String
# containing markup.
#
# Also see related method +inner_html=+
def children=(node_or_tags)
node_or_tags = coerce(node_or_tags)
children.unlink
if node_or_tags.is_a?(XML::NodeSet)
node_or_tags.each { |n| add_child_node_and_reparent_attrs(n) }
else
add_child_node_and_reparent_attrs(node_or_tags)
end
end
####
# Replace this Node with +node_or_tags+.
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
# containing markup.
#
# Returns the reparented node (if +node_or_tags+ is a Node), or NodeSet (if +node_or_tags+ is
# a DocumentFragment, NodeSet, or String).
#
# Also see related method +swap+.
def replace(node_or_tags)
raise("Cannot replace a node with no parent") unless parent
# We cannot replace a text node directly, otherwise libxml will return
# an internal error at parser.c:13031, I don't know exactly why
# libxml is trying to find a parent node that is an element or document
# so I can't tell if this is bug in libxml or not. issue #775.
if text?
replacee = Nokogiri::XML::Node.new("dummy", document)
add_previous_sibling_node(replacee)
unlink
return replacee.replace(node_or_tags)
end
node_or_tags = parent.coerce(node_or_tags)
if node_or_tags.is_a?(XML::NodeSet)
node_or_tags.each { |n| add_previous_sibling(n) }
unlink
else
replace_node(node_or_tags)
end
node_or_tags
end
####
# Swap this Node for +node_or_tags+
#
# +node_or_tags+ can be a Nokogiri::XML::Node, a ::DocumentFragment, a ::NodeSet, or a String
# Containing markup.
#
# Returns self, to support chaining of calls.
#
# Also see related method +replace+.
def swap(node_or_tags)
replace(node_or_tags)
self
end
####
# Set the Node's content to a Text node containing +string+. The string gets XML escaped, not
# interpreted as markup.
def content=(string)
self.native_content = encode_special_chars(string.to_s)
end
###
# Set the parent Node for this Node
def parent=(parent_node)
parent_node.add_child(self)
end
###
# Adds a default namespace supplied as a string +url+ href, to self.
# The consequence is as an xmlns attribute with supplied argument were
# present in parsed XML. A default namespace set with this method will
# now show up in #attributes, but when this node is serialized to XML an
# "xmlns" attribute will appear. See also #namespace and #namespace=
def default_namespace=(url)
add_namespace_definition(nil, url)
end
###
# Set the default namespace on this node (as would be defined with an
# "xmlns=" attribute in XML source), as a Namespace object +ns+. Note that
# a Namespace added this way will NOT be serialized as an xmlns attribute
# for this node. You probably want #default_namespace= instead, or perhaps
# #add_namespace_definition with a nil prefix argument.
def namespace=(ns)
return set_namespace(ns) unless ns
unless Nokogiri::XML::Namespace === ns
raise TypeError, "#{ns.class} can't be coerced into Nokogiri::XML::Namespace"
end
if ns.document != document
raise ArgumentError, "namespace must be declared on the same document"
end
set_namespace(ns)
end
###
# Do xinclude substitution on the subtree below node. If given a block, a
# Nokogiri::XML::ParseOptions object initialized from +options+, will be
# passed to it, allowing more convenient modification of the parser options.
def do_xinclude(options = XML::ParseOptions::DEFAULT_XML)
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
yield options if block_given?
# call c extension
process_xincludes(options.to_i)
end
alias_method :next, :next_sibling
alias_method :previous, :previous_sibling
alias_method :next=, :add_next_sibling
alias_method :previous=, :add_previous_sibling
alias_method :remove, :unlink
alias_method :name=, :node_name=
alias_method :add_namespace, :add_namespace_definition
# :section:
alias_method :inner_text, :content
alias_method :text, :content
alias_method :to_str, :content
alias_method :name, :node_name
alias_method :type, :node_type
alias_method :clone, :dup
alias_method :elements, :element_children
# :section: Working With Node Attributes
# :call-seq: [](name) → (String, nil)
#
# Fetch an attribute from this node.
#
# ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
# namespaced attributes, use #attribute_with_ns.
#
# [Returns] (String, nil) value of the attribute +name+, or +nil+ if no matching attribute exists
#
# *Example*
#
# doc = Nokogiri::XML("")
# child = doc.at_css("child")
# child["size"] # => "large"
# child["class"] # => "big wide tall"
#
# *Example:* Namespaced attributes will not be returned.
#
# ⚠ Note namespaced attributes may be accessed with #attribute or #attribute_with_ns
#
# doc = Nokogiri::XML(<<~EOF)
#
#
#
# EOF
# doc.at_css("child")["size"] # => nil
# doc.at_css("child").attribute("size").value # => "broad"
# doc.at_css("child").attribute_with_ns("size", "http://example.com/widths").value
# # => "broad"
#
def [](name)
get(name.to_s)
end
# :call-seq: []=(name, value) → value
#
# Update the attribute +name+ to +value+, or create the attribute if it does not exist.
#
# ⚠ Note that attributes with namespaces cannot be accessed with this method. To access
# namespaced attributes for update, use #attribute_with_ns. To add a namespaced attribute,
# see the example below.
#
# [Returns] +value+
#
# *Example*
#
# doc = Nokogiri::XML("")
# child = doc.at_css("child")
# child["size"] = "broad"
# child.to_html
# # => ""
#
# *Example:* Add a namespaced attribute.
#
# doc = Nokogiri::XML(<<~EOF)
#
#
#
# EOF
# child = doc.at_css("child")
# child["size"] = "broad"
# ns = doc.root.namespace_definitions.find { |ns| ns.prefix == "width" }
# child.attribute("size").namespace = ns
# doc.to_html
# # => "\n" +
# # " \n" +
# # "\n"
#
def []=(name, value)
set(name.to_s, value.to_s)
end
#
# :call-seq: attributes() → Hash
#
# Fetch this node's attributes.
#
# ⚠ Because the keys do not include any namespace information for the attribute, in case of a
# simple name collision, not all attributes will be returned. In this case, you will need to
# use #attribute_nodes.
#
# [Returns]
# Hash containing attributes belonging to +self+. The hash keys are String attribute
# names (without the namespace), and the hash values are Nokogiri::XML::Attr.
#
# *Example* with no namespaces:
#
# doc = Nokogiri::XML("")
# doc.at_css("child").attributes
# # => {"size"=>#(Attr:0x550 { name = "size", value = "large" }),
# # "class"=>#(Attr:0x564 { name = "class", value = "big wide tall" })}
#
# *Example* with a namespace:
#
# doc = Nokogiri::XML("")
# doc.at_css("child").attributes
# # => {"size"=>
# # #(Attr:0x550 {
# # name = "size",
# # namespace = #(Namespace:0x564 {
# # prefix = "desc",
# # href = "http://example.com/sizes"
# # }),
# # value = "large"
# # })}
#
# *Example* with an attribute name collision:
#
# ⚠ Note that only one of the attributes is returned in the Hash.
#
# doc = Nokogiri::XML(<<~EOF)
#
#
#
# EOF
# doc.at_css("child").attributes
# # => {"size"=>
# # #(Attr:0x550 {
# # name = "size",
# # namespace = #(Namespace:0x564 {
# # prefix = "height",
# # href = "http://example.com/heights"
# # }),
# # value = "tall"
# # })}
#
def attributes
attribute_nodes.each_with_object({}) do |node, hash|
hash[node.node_name] = node
end
end
###
# Get the attribute values for this Node.
def values
attribute_nodes.map(&:value)
end
###
# Does this Node's attributes include
def value?(value)
values.include?(value)
end
###
# Get the attribute names for this Node.
def keys
attribute_nodes.map(&:node_name)
end
###
# Iterate over each attribute name and value pair for this Node.
def each
attribute_nodes.each do |node|
yield [node.node_name, node.value]
end
end
###
# Remove the attribute named +name+
def remove_attribute(name)
attr = attributes[name].remove if key?(name)
clear_xpath_context if Nokogiri.jruby?
attr
end
#
# :call-seq: classes() → Array
#
# Fetch CSS class names of a Node.
#
# This is a convenience function and is equivalent to:
#
# node.kwattr_values("class")
#
# See related: #kwattr_values, #add_class, #append_class, #remove_class
#
# [Returns]
# The CSS classes (Array of String) present in the Node's "class" attribute. If the
# attribute is empty or non-existent, the return value is an empty array.
#
# *Example*
#
# node # =>
# node.classes # => ["section", "title", "header"]
#
def classes
kwattr_values("class")
end
#
# :call-seq: add_class(names) → self
#
# Ensure HTML CSS classes are present on +self+. Any CSS classes in +names+ that already exist
# in the "class" attribute are _not_ added. Note that any existing duplicates in the
# "class" attribute are not removed. Compare with #append_class.
#
# This is a convenience function and is equivalent to:
#
# node.kwattr_add("class", names)
#
# See related: #kwattr_add, #classes, #append_class, #remove_class
#
# [Parameters]
# - +names+ (String, Array)
#
# CSS class names to be added to the Node's "class" attribute. May be a string containing
# whitespace-delimited names, or an Array of String names. Any class names already present
# will not be added. Any class names not present will be added. If no "class" attribute
# exists, one is created.
#
# [Returns] +self+ (Node) for ease of chaining method calls.
#
# *Example:* Ensure that the node has CSS class "section"
#
# node # =>
# node.add_class("section") # =>
# node.add_class("section") # => # duplicate not added
#
# *Example:* Ensure that the node has CSS classes "section" and "header", via a String argument
#
# Note that the CSS class "section" is not added because it is already present.
# Note also that the pre-existing duplicate CSS class "section" is not removed.
#
# node # =>
# node.add_class("section header") # =>
#
# *Example:* Ensure that the node has CSS classes "section" and "header", via an Array argument
#
# node # =>
# node.add_class(["section", "header"]) # =>
#
def add_class(names)
kwattr_add("class", names)
end
#
# :call-seq: append_class(names) → self
#
# Add HTML CSS classes to +self+, regardless of duplication. Compare with #add_class.
#
# This is a convenience function and is equivalent to:
#
# node.kwattr_append("class", names)
#
# See related: #kwattr_append, #classes, #add_class, #remove_class
#
# [Parameters]
# - +names+ (String, Array)
#
# CSS class names to be appended to the Node's "class" attribute. May be a string containing
# whitespace-delimited names, or an Array of String names. All class names passed in will be
# appended to the "class" attribute even if they are already present in the attribute
# value. If no "class" attribute exists, one is created.
#
# [Returns] +self+ (Node) for ease of chaining method calls.
#
# *Example:* Append "section" to the node's CSS "class" attribute
#
# node # =>
# node.append_class("section") # =>
# node.append_class("section") # => # duplicate added!
#
# *Example:* Append "section" and "header" to the noded's CSS "class" attribute, via a String argument
#
# Note that the CSS class "section" is appended even though it is already present
#
# node # =>
# node.append_class("section header") # =>
#
# *Example:* Append "section" and "header" to the node's CSS "class" attribute, via an Array argument
#
# node # =>
# node.append_class(["section", "header"]) # =>
# node.append_class(["section", "header"]) # =>
#
def append_class(names)
kwattr_append("class", names)
end
# :call-seq:
# remove_class(css_classes) → self
#
# Remove HTML CSS classes from this node. Any CSS class names in +css_classes+ that exist in
# this node's "class" attribute are removed, including any multiple entries.
#
# If no CSS classes remain after this operation, or if +css_classes+ is +nil+, the "class"
# attribute is deleted from the node.
#
# This is a convenience function and is equivalent to:
#
# node.kwattr_remove("class", css_classes)
#
# Also see #kwattr_remove, #classes, #add_class, #append_class
#
# [Parameters]
# - +css_classes+ (String, Array)
#
# CSS class names to be removed from the Node's
# "class" attribute. May be a string containing whitespace-delimited names, or an Array of
# String names. Any class names already present will be removed. If no CSS classes remain,
# the "class" attribute is deleted.
#
# [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
#
# *Example*: Deleting a CSS class
#
# Note that all instances of the class "section" are removed from the "class" attribute.
#
# node # =>
# node.remove_class("section") # =>
#
# *Example*: Deleting the only remaining CSS class
#
# Note that the attribute is removed once there are no remaining classes.
#
# node # =>
# node.remove_class("section") # =>
#
# *Example*: Deleting multiple CSS classes
#
# Note that the "class" attribute is deleted once it's empty.
#
# node # =>
# node.remove_class(["section", "float"]) # =>
#
def remove_class(names = nil)
kwattr_remove("class", names)
end
# :call-seq:
# kwattr_values(attribute_name) → Array
#
# Fetch values from a keyword attribute of a Node.
#
# A "keyword attribute" is a node attribute that contains a set of space-delimited
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
# contain CSS classes. But other keyword attributes exist, for instance
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
#
# See also #classes, #kwattr_add, #kwattr_append, #kwattr_remove
#
# [Parameters]
# - +attribute_name+ (String) The name of the keyword attribute to be inspected.
#
# [Returns]
# (Array) The values present in the Node's +attribute_name+ attribute. If the
# attribute is empty or non-existent, the return value is an empty array.
#
# *Example:*
#
# node # => link
# node.kwattr_values("rel") # => ["nofollow", "noopener", "external"]
#
# Since v1.11.0
def kwattr_values(attribute_name)
keywordify(get_attribute(attribute_name) || [])
end
# :call-seq:
# kwattr_add(attribute_name, keywords) → self
#
# Ensure that values are present in a keyword attribute.
#
# Any values in +keywords+ that already exist in the Node's attribute values are _not_
# added. Note that any existing duplicates in the attribute values are not removed. Compare
# with #kwattr_append.
#
# A "keyword attribute" is a node attribute that contains a set of space-delimited
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
# contain CSS classes. But other keyword attributes exist, for instance
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
#
# See also #add_class, #kwattr_values, #kwattr_append, #kwattr_remove
#
# [Parameters]
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
# - +keywords+ (String, Array)
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
# whitespace-delimited values, or an Array of String values. Any values already present will
# not be added. Any values not present will be added. If the named attribute does not exist,
# it is created.
#
# [Returns] +self+ (Nokogiri::XML::Node) for ease of chaining method calls.
#
# *Example:* Ensure that a +Node+ has "nofollow" in its +rel+ attribute.
#
# Note that duplicates are not added.
#
# node # =>
# node.kwattr_add("rel", "nofollow") # =>
# node.kwattr_add("rel", "nofollow") # =>
#
# *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via a
# String argument.
#
# Note that "nofollow" is not added because it is already present. Note also that the
# pre-existing duplicate "nofollow" is not removed.
#
# node # =>
# node.kwattr_add("rel", "nofollow noreferrer") # =>
#
# *Example:* Ensure that a +Node+ has "nofollow" and "noreferrer" in its +rel+ attribute, via
# an Array argument.
#
# node # =>
# node.kwattr_add("rel", ["nofollow", "noreferrer"]) # =>
#
# Since v1.11.0
def kwattr_add(attribute_name, keywords)
keywords = keywordify(keywords)
current_kws = kwattr_values(attribute_name)
new_kws = (current_kws + (keywords - current_kws)).join(" ")
set_attribute(attribute_name, new_kws)
self
end
# :call-seq:
# kwattr_append(attribute_name, keywords) → self
#
# Add keywords to a Node's keyword attribute, regardless of duplication. Compare with
# #kwattr_add.
#
# A "keyword attribute" is a node attribute that contains a set of space-delimited
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
# contain CSS classes. But other keyword attributes exist, for instance
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
#
# See also #append_class, #kwattr_values, #kwattr_add, #kwattr_remove
#
# [Parameters]
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
# - +keywords+ (String, Array)
# Keywords to be added to the attribute named +attribute_name+. May be a string containing
# whitespace-delimited values, or an Array of String values. All values passed in will be
# appended to the named attribute even if they are already present in the attribute. If the
# named attribute does not exist, it is created.
#
# [Returns] +self+ (Node) for ease of chaining method calls.
#
# *Example:* Append "nofollow" to the +rel+ attribute.
#
# Note that duplicates are added.
#
# node # =>
# node.kwattr_append("rel", "nofollow") # =>
# node.kwattr_append("rel", "nofollow") # =>
#
# *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via a String argument.
#
# Note that "nofollow" is appended even though it is already present.
#
# node # =>
# node.kwattr_append("rel", "nofollow noreferrer") # =>
#
#
# *Example:* Append "nofollow" and "noreferrer" to the +rel+ attribute, via an Array argument.
#
# node # =>
# node.kwattr_append("rel", ["nofollow", "noreferrer"]) # =>
#
# Since v1.11.0
def kwattr_append(attribute_name, keywords)
keywords = keywordify(keywords)
current_kws = kwattr_values(attribute_name)
new_kws = (current_kws + keywords).join(" ")
set_attribute(attribute_name, new_kws)
self
end
# :call-seq:
# kwattr_remove(attribute_name, keywords) → self
#
# Remove keywords from a keyword attribute. Any matching keywords that exist in the named
# attribute are removed, including any multiple entries.
#
# If no keywords remain after this operation, or if +keywords+ is +nil+, the attribute is
# deleted from the node.
#
# A "keyword attribute" is a node attribute that contains a set of space-delimited
# values. Perhaps the most familiar example of this is the HTML "class" attribute used to
# contain CSS classes. But other keyword attributes exist, for instance
# {the "rel" attribute}[https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel].
#
# See also #remove_class, #kwattr_values, #kwattr_add, #kwattr_append
#
# [Parameters]
# - +attribute_name+ (String) The name of the keyword attribute to be modified.
# - +keywords+ (String, Array)
# Keywords to be removed from the attribute named +attribute_name+. May be a string
# containing whitespace-delimited values, or an Array of String values. Any keywords present
# in the named attribute will be removed. If no keywords remain, or if +keywords+ is nil,
# the attribute is deleted.
#
# [Returns] +self+ (Node) for ease of chaining method calls.
#
# *Example:*
#
# Note that the +rel+ attribute is deleted when empty.
#
# node # => link
# node.kwattr_remove("rel", "nofollow") # => link
# node.kwattr_remove("rel", "noreferrer") # => link
#
# Since v1.11.0
def kwattr_remove(attribute_name, keywords)
if keywords.nil?
remove_attribute(attribute_name)
return self
end
keywords = keywordify(keywords)
current_kws = kwattr_values(attribute_name)
new_kws = current_kws - keywords
if new_kws.empty?
remove_attribute(attribute_name)
else
set_attribute(attribute_name, new_kws.join(" "))
end
self
end
alias_method :delete, :remove_attribute
alias_method :get_attribute, :[]
alias_method :attr, :[]
alias_method :set_attribute, :[]=
alias_method :has_attribute?, :key?
# :section:
###
# Returns true if this Node matches +selector+
def matches?(selector)
ancestors.last.search(selector).include?(self)
end
###
# Create a DocumentFragment containing +tags+ that is relative to _this_
# context node.
def fragment(tags)
document.related_class("DocumentFragment").new(document, tags, self)
end
###
# Parse +string_or_io+ as a document fragment within the context of
# *this* node. Returns a XML::NodeSet containing the nodes parsed from
# +string_or_io+.
def parse(string_or_io, options = nil)
##
# When the current node is unparented and not an element node, use the
# document as the parsing context instead. Otherwise, the in-context
# parser cannot find an element or a document node.
# Document Fragments are also not usable by the in-context parser.
if !element? && !document? && (!parent || parent.fragment?)
return document.parse(string_or_io, options)
end
options ||= (document.html? ? ParseOptions::DEFAULT_HTML : ParseOptions::DEFAULT_XML)
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
yield options if block_given?
contents = if string_or_io.respond_to?(:read)
string_or_io.read
else
string_or_io
end
return Nokogiri::XML::NodeSet.new(document) if contents.empty?
error_count = document.errors.length
node_set = in_context(contents, options.to_i)
if document.errors.length > error_count
raise document.errors[error_count] unless options.recover?
if node_set.empty?
# libxml2 < 2.13 does not obey the +recover+ option after encountering errors during
# +in_context+ parsing, and so this horrible hack is here to try to emulate recovery
# behavior.
#
# (Note that HTML4 fragment parsing seems to have been fixed in abd74186, and XML
# fragment parsing is fixed in 1c106edf. Both are in 2.13.)
#
# Unfortunately, this means we're no longer parsing "in context" and so namespaces that
# would have been inherited from the context node won't be handled correctly. This hack
# was written in 2010, and I regret it, because it's silently degrading functionality in
# a way that's not easily prevented (or even detected).
#
# I think preferable behavior would be to either:
#
# a. add an error noting that we "fell back" and pointing the user to turning off the
# +recover+ option
# b. don't recover, but raise a sensible exception
#
# For context and background:
# - https://github.com/sparklemotion/nokogiri/issues/313
# - https://github.com/sparklemotion/nokogiri/issues/2092
fragment = document.related_class("DocumentFragment").parse(contents)
node_set = fragment.children
end
end
node_set
end
# :call-seq:
# namespaces() → Hash
#
# Fetch all the namespaces on this node and its ancestors.
#
# Note that the keys in this hash XML attributes that would be used to define this namespace,
# such as "xmlns:prefix", not just the prefix.
#
# The default namespace for this node will be included with key "xmlns".
#
# See also #namespace_scopes
#
# [Returns]
# Hash containing all the namespaces on this node and its ancestors. The hash keys are the
# namespace prefix, and the hash value for each key is the namespace URI.
#
# *Example:*
#
# doc = Nokogiri::XML(<<~EOF)
#
#
#
#
#
# EOF
# doc.at_xpath("//root:first", "root" => "http://example.com/root").namespaces
# # => {"xmlns"=>"http://example.com/root",
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
# doc.at_xpath("//child:second", "child" => "http://example.com/child").namespaces
# # => {"xmlns"=>"http://example.com/child",
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
# doc.at_xpath("//root:third", "root" => "http://example.com/root").namespaces
# # => {"xmlns:foo"=>"http://example.com/foo",
# # "xmlns"=>"http://example.com/root",
# # "xmlns:in_scope"=>"http://example.com/in_scope"}
#
def namespaces
namespace_scopes.each_with_object({}) do |ns, hash|
prefix = ns.prefix
key = prefix ? "xmlns:#{prefix}" : "xmlns"
hash[key] = ns.href
end
end
# Returns true if this is a Comment
def comment?
type == COMMENT_NODE
end
# Returns true if this is a CDATA
def cdata?
type == CDATA_SECTION_NODE
end
# Returns true if this is an XML::Document node
def xml?
type == DOCUMENT_NODE
end
# Returns true if this is an HTML4::Document or HTML5::Document node
def html?
type == HTML_DOCUMENT_NODE
end
# Returns true if this is a Document
def document?
is_a?(XML::Document)
end
# Returns true if this is a ProcessingInstruction node
def processing_instruction?
type == PI_NODE
end
# Returns true if this is a Text node
def text?
type == TEXT_NODE
end
# Returns true if this is a DocumentFragment
def fragment?
type == DOCUMENT_FRAG_NODE
end
###
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
# nil on XML documents and on unknown tags.
def description
return if document.xml?
Nokogiri::HTML4::ElementDescription[name]
end
###
# Is this a read only node?
def read_only?
# According to gdome2, these are read-only node types
[NOTATION_NODE, ENTITY_NODE, ENTITY_DECL].include?(type)
end
# Returns true if this is an Element node
def element?
type == ELEMENT_NODE
end
alias_method :elem?, :element?
###
# Turn this node in to a string. If the document is HTML, this method
# returns html. If the document is XML, this method returns XML.
def to_s
document.xml? ? to_xml : to_html
end
# Get the inner_html for this node's Node#children
def inner_html(*args)
children.map { |x| x.to_html(*args) }.join
end
# Get the path to this node as a CSS expression
def css_path
path.split(%r{/}).filter_map do |part|
part.empty? ? nil : part.gsub(/\[(\d+)\]/, ':nth-of-type(\1)')
end.join(" > ")
end
###
# Get a list of ancestor Node for this Node. If +selector+ is given,
# the ancestors must match +selector+
def ancestors(selector = nil)
return NodeSet.new(document) unless respond_to?(:parent)
return NodeSet.new(document) unless parent
parents = [parent]
while parents.last.respond_to?(:parent)
break unless (ctx_parent = parents.last.parent)
parents << ctx_parent
end
return NodeSet.new(document, parents) unless selector
root = parents.last
search_results = root.search(selector)
NodeSet.new(document, parents.find_all do |parent|
search_results.include?(parent)
end)
end
####
# Yields self and all children to +block+ recursively.
def traverse(&block)
children.each { |j| j.traverse(&block) }
yield(self)
end
###
# Accept a visitor. This method calls "visit" on +visitor+ with self.
def accept(visitor)
visitor.visit(self)
end
###
# Test to see if this Node is equal to +other+
def ==(other)
return false unless other
return false unless other.respond_to?(:pointer_id)
pointer_id == other.pointer_id
end
###
# Compare two Node objects with respect to their Document. Nodes from
# different documents cannot be compared.
def <=>(other)
return unless other.is_a?(Nokogiri::XML::Node)
return unless document == other.document
compare(other)
end
# :section: Serialization and Generating Output
###
# Serialize Node using +options+. Save options can also be set using a block.
#
# See also Nokogiri::XML::Node::SaveOptions and Node@Serialization+and+Generating+Output.
#
# These two statements are equivalent:
#
# node.serialize(encoding: 'UTF-8', save_with: FORMAT | AS_XML)
#
# or
#
# node.serialize(encoding: 'UTF-8') do |config|
# config.format.as_xml
# end
#
def serialize(*args, &block)
# TODO: deprecate non-hash options, see 46c68ed 2009-06-20 for context
options = if args.first.is_a?(Hash)
args.shift
else
{
encoding: args[0],
save_with: args[1],
}
end
options[:encoding] ||= document.encoding
encoding = Encoding.find(options[:encoding] || "UTF-8")
io = StringIO.new(String.new(encoding: encoding))
write_to(io, options, &block)
io.string
end
###
# Serialize this Node to HTML
#
# doc.to_html
#
# See Node#write_to for a list of +options+. For formatted output,
# use Node#to_xhtml instead.
def to_html(options = {})
to_format(SaveOptions::DEFAULT_HTML, options)
end
###
# Serialize this Node to XML using +options+
#
# doc.to_xml(indent: 5, encoding: 'UTF-8')
#
# See Node#write_to for a list of +options+
def to_xml(options = {})
options[:save_with] ||= SaveOptions::DEFAULT_XML
serialize(options)
end
###
# Serialize this Node to XHTML using +options+
#
# doc.to_xhtml(indent: 5, encoding: 'UTF-8')
#
# See Node#write_to for a list of +options+
def to_xhtml(options = {})
to_format(SaveOptions::DEFAULT_XHTML, options)
end
###
# :call-seq:
# write_to(io, *options)
#
# Serialize this node or document to +io+.
#
# [Parameters]
# - +io+ (IO) An IO-like object to which the serialized content will be written.
# - +options+ (Hash) See below
#
# [Options]
# * +:encoding+ (String or Encoding) specify the encoding of the output (defaults to document encoding)
# * +:indent_text+ (String) the indentation text (defaults to " "
)
# * +:indent+ (Integer) the number of +:indent_text+ to use (defaults to +2+)
# * +:save_with+ (Integer) a combination of SaveOptions constants
#
# To save with UTF-8 indented twice:
#
# node.write_to(io, encoding: 'UTF-8', indent: 2)
#
# To save indented with two dashes:
#
# node.write_to(io, indent_text: '-', indent: 2)
#
def write_to(io, *options)
options = options.first.is_a?(Hash) ? options.shift : {}
encoding = options[:encoding] || options[0] || document.encoding
if Nokogiri.jruby?
save_options = options[:save_with] || options[1]
indent_times = options[:indent] || 0
else
save_options = options[:save_with] || options[1] || SaveOptions::FORMAT
indent_times = options[:indent] || 2
end
indent_text = options[:indent_text] || " "
# Any string times 0 returns an empty string. Therefore, use the same
# string instead of generating a new empty string for every node with
# zero indentation.
indentation = indent_times.zero? ? "" : (indent_text * indent_times)
config = SaveOptions.new(save_options.to_i)
yield config if block_given?
encoding = encoding.is_a?(Encoding) ? encoding.name : encoding
native_write_to(io, encoding, indentation, config.options)
end
###
# Write Node as HTML to +io+ with +options+
#
# See Node#write_to for a list of +options+
def write_html_to(io, options = {})
write_format_to(SaveOptions::DEFAULT_HTML, io, options)
end
###
# Write Node as XHTML to +io+ with +options+
#
# See Node#write_to for a list of +options+
def write_xhtml_to(io, options = {})
write_format_to(SaveOptions::DEFAULT_XHTML, io, options)
end
###
# Write Node as XML to +io+ with +options+
#
# doc.write_xml_to io, :encoding => 'UTF-8'
#
# See Node#write_to for a list of options
def write_xml_to(io, options = {})
options[:save_with] ||= SaveOptions::DEFAULT_XML
write_to(io, options)
end
def canonicalize(mode = XML::XML_C14N_1_0, inclusive_namespaces = nil, with_comments = false)
c14n_root = self
document.canonicalize(mode, inclusive_namespaces, with_comments) do |node, parent|
tn = node.is_a?(XML::Node) ? node : parent
tn == c14n_root || tn.ancestors.include?(c14n_root)
end
end
DECONSTRUCT_KEYS = [:name, :attributes, :children, :namespace, :content, :elements, :inner_html].freeze # :nodoc:
DECONSTRUCT_METHODS = { attributes: :attribute_nodes }.freeze # :nodoc:
#
# :call-seq: deconstruct_keys(array_of_names) → Hash
#
# Returns a hash describing the Node, to use in pattern matching.
#
# Valid keys and their values:
# - +name+ → (String) The name of this node, or "text" if it is a Text node.
# - +namespace+ → (Namespace, nil) The namespace of this node, or nil if there is no namespace.
# - +attributes+ → (Array) The attributes of this node.
# - +children+ → (Array) The children of this node. 💡 Note this includes text nodes.
# - +elements+ → (Array) The child elements of this node. 💡 Note this does not include text nodes.
# - +content+ → (String) The contents of all the text nodes in this node's subtree. See #content.
# - +inner_html+ → (String) The inner markup for the children of this node. See #inner_html.
#
# *Example*
#
# doc = Nokogiri::XML.parse(<<~XML)
#
#
# First
# Second
#
# XML
#
# doc.root.deconstruct_keys([:name, :namespace])
# # => {:name=>"parent",
# # :namespace=>
# # #(Namespace:0x35c { href = "http://nokogiri.org/ns/default" })}
#
# doc.root.deconstruct_keys([:inner_html, :content])
# # => {:content=>"\n" + " First\n" + " Second\n",
# # :inner_html=>
# # "\n" +
# # " First\n" +
# # " Second\n"}
#
# doc.root.elements.first.deconstruct_keys([:attributes])
# # => {:attributes=>
# # [#(Attr:0x370 { name = "foo", value = "abc" }),
# # #(Attr:0x384 {
# # name = "bar",
# # namespace = #(Namespace:0x398 {
# # prefix = "noko",
# # href = "http://nokogiri.org/ns/noko"
# # }),
# # value = "def"
# # })]}
#
# Since v1.14.0
#
def deconstruct_keys(keys)
requested_keys = DECONSTRUCT_KEYS & keys
{}.tap do |values|
requested_keys.each do |key|
method = DECONSTRUCT_METHODS[key] || key
values[key] = send(method)
end
end
end
# :section:
protected
def coerce(data)
case data
when XML::NodeSet
return data
when XML::DocumentFragment
return data.children
when String
return fragment(data).children
when Document, XML::Attr
# unacceptable
when XML::Node
return data
end
raise ArgumentError, <<~EOERR
Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
(You probably want to select a node from the Document with at() or search(), or create a new Node via Node.new().)
EOERR
end
private
def keywordify(keywords)
case keywords
when Enumerable
keywords
when String
keywords.scan(/\S+/)
else
raise ArgumentError,
"Keyword attributes must be passed as either a String or an Enumerable, but received #{keywords.class}"
end
end
def add_sibling(next_or_previous, node_or_tags)
raise("Cannot add sibling to a node with no parent") unless parent
impl = next_or_previous == :next ? :add_next_sibling_node : :add_previous_sibling_node
iter = next_or_previous == :next ? :reverse_each : :each
node_or_tags = parent.coerce(node_or_tags)
if node_or_tags.is_a?(XML::NodeSet)
if text?
pivot = Nokogiri::XML::Node.new("dummy", document)
send(impl, pivot)
else
pivot = self
end
node_or_tags.send(iter) { |n| pivot.send(impl, n) }
pivot.unlink if text?
else
send(impl, node_or_tags)
end
node_or_tags
end
USING_LIBXML_WITH_BROKEN_SERIALIZATION = Nokogiri.uses_libxml?("~> 2.6.0").freeze
private_constant :USING_LIBXML_WITH_BROKEN_SERIALIZATION
def to_format(save_option, options)
return dump_html if USING_LIBXML_WITH_BROKEN_SERIALIZATION
options[:save_with] = save_option unless options[:save_with]
serialize(options)
end
def write_format_to(save_option, io, options)
return (io << dump_html) if USING_LIBXML_WITH_BROKEN_SERIALIZATION
options[:save_with] ||= save_option
write_to(io, options)
end
def inspect_attributes
[:name, :namespace, :attribute_nodes, :children]
end
IMPLIED_XPATH_CONTEXTS = [".//"].freeze
def add_child_node_and_reparent_attrs(node)
add_child_node(node)
node.attribute_nodes.find_all { |a| a.name.include?(":") }.each do |attr_node|
attr_node.remove
node[attr_node.name] = attr_node.value
end
end
end
end
end
require_relative "node/save_options"