require 'xml/mixup/version'
require 'nokogiri'
require 'set'
module XML::Mixup
#
# these are node attachment protocols
private
ADJACENT = {
parent: lambda do |node, parent|
if parent.node_type == 9 and node.node_type == 1
parent.root = node
elsif node.node_type == 11
node.children.each do |child|
parent.add_child(child)
end
else
parent.add_child(node)
end
end,
before: lambda do |node, sibling|
sibling.add_previous_sibling node
end,
after: lambda { |node, sibling| sibling.add_next_sibling node },
replace: lambda { |node, target| target.replace node },
}.freeze
RESERVED = Set.new(%w{comment cdata doctype dtd elem element
pi processing-instruction tag}.map {|x| "##{x}"}).freeze
public
# Generate a handy blank document.
#
# @param version [Numeric, nil]
#
# @return [Nokogiri::XML::Document] a Nokogiri XML document.
def xml_doc version = nil
Nokogiri::XML::Document.new version
end
# Generates an XML tree from a given specification.
#
# require 'xml-mixup'
#
# class Anything
# include XML::Mixup
# end
#
# something = Anything.new
#
# # generate a structure
# node = something.markup spec: [
# { '#pi' => 'xml-stylesheet', type: 'text/xsl', href: '/transform' },
# { '#dtd' => :html },
# { '#html' => [
# { '#head' => [
# { '#title' => 'look ma, title' },
# { '#elem' => :base, href: 'http://the.base/url' },
# ] },
# { '#body' => [
# { '#h1' => 'Illustrious Heading' },
# { '#p' => :lolwut },
# ] },
# ], xmlns: 'http://www.w3.org/1999/xhtml' }
# ]
#
# # `node` will correspond to the last thing generated. In this
# # case, it will be a text node containing 'lolwut'.
#
# doc = node.document
# puts doc.to_xml
#
# @param spec [Hash, Array, Nokogiri::XML::Node, Proc, #to_s] An XML
# tree specification. May be composed of multiple hashes and
# arrays. See the spec spec.
#
# @param doc [Nokogiri::XML::Document, nil] an optional XML document
# instance; will be supplied if none given.
#
# @param args [#to_a] Any arguments to be passed to any callbacks
# anywhere in the spec. Assumed to be an array.
#
# @param parent [Nokogiri::XML::Node] The node under which the
# evaluation result of the spec is to be attached. This is the
# default adjacent node, which in turn defaults to the document if
# it or no other adjacent node is given. Conflicts with other
# adjacent nodes.
#
# @param before [Nokogiri::XML::Node] This represents a _sibling_
# node which the spec is to be inserted _before_. Conflicts with
# other adjacent nodes.
#
# @param after [Nokogiri::XML::Node] This represents a _sibling_
# node which the spec is to be inserted _after_. Conflicts with
# other adjacent nodes.
#
# @param replace [Nokogiri::XML::Node] This represents a _sibling_
# node which the spec is intended to _replace_. Conflicts with
# other adjacent nodes.
#
# @return [Nokogiri::XML::Node] the last node generated, in document
# order. Will return a {Nokogiri::XML::Document} when called
# without arguments.
def markup spec: nil, doc: nil, args: [], **nodes
# handle adjacent node declaration
adj = nil
ADJACENT.keys.each do |k|
if nodes[k]
if adj
raise "Cannot bind to #{k}: #{adj} is already present"
end
unless nodes[k].is_a? Nokogiri::XML::Node
raise "#{k} must be an XML node"
end
adj = k
end
end
# generate doc/parent
if adj
doc ||= nodes[adj].document
unless adj.to_sym == :parent
unless (nodes[:parent] = nodes[adj].parent)
raise "#{adj} node must have a parent node!"
end
end
else
doc ||= Nokogiri::XML::Document.new
nodes[adj = :parent] ||= doc
end
node = nodes[adj]
# dispatch based on spec type
if spec and not (spec.respond_to? :empty? and spec.empty?)
spec = spec.to_a if spec.is_a? Nokogiri::XML::NodeSet
if spec.is_a? Array
par = adj == :parent ? nodes[:parent] : doc.fragment
out = spec.map do |x|
markup(spec: x, parent: par, pseudo: nodes[:parent], doc: doc,
args: nodes[:args])
end
# only run this if there is something to run
if out.length > 0
# this is already attached if the adjacent node is the parent
ADJACENT[adj].call(par, nodes[adj]) unless adj == :parent
node = out.last
end
# node otherwise defaults to adjacent
elsif spec.respond_to? :call
# handle proc/lambda/whatever
node = markup(spec: spec.call(*args), args: args,
doc: doc, adj => nodes[adj])
elsif spec.is_a? Hash
# maybe element, maybe something else
# find the nil key which should contain a viable node name
# (and maybe children)
name = nil
children = []
if x = spec[nil]
if x.respond_to? :to_a
x = x.to_a
name = x.shift
children = x
else
name = x
end
elsif (compact = spec.select { |k, _|
k.respond_to?(:to_a) or k.is_a?(Nokogiri::XML::Node)}) and
not compact.empty?
# compact syntax eliminates the `nil` key
raise %q{Spec can't have duplicate compact keys} if compact.count > 1
children, name = compact.first
children = children.respond_to?(:to_a) ? children.to_a : [children]
elsif (special = spec.select { |k, _|
k.respond_to? :to_s and k.to_s.start_with? '#' }) and
not special.empty?
# these are special keys
raise %q{Spec can't have multiple special keys} if special.count > 1
name, children = special.first
if %w{# #elem #element #tag}.any? name
# then the name is in the `children` slot
raise "Value of #{name} shorthand formulation" +
"must be a valid element name" unless children.to_s
name = children
# set children to empty array
children = []
elsif not RESERVED.any? name
# then the name is encoded into the key and we have to
# remove the octothorpe
name = name[1..name.length]
end
# don't forget to reset the child nodes
children = children.respond_to?(:to_a) ? children.to_a : [children]
end
# note the name can be nil because it can be inferred
# now we pull out "attributes" which are the rest of the keys;
# these should be amenable to being turned into symbols
attr = spec.select { |k, _|
k and k.respond_to? :to_sym and not k.to_s.start_with? '#'
}.transform_keys(&:to_sym)
# now we dispatch based on the name
if name == '#comment'
# first up, comments
node = doc.create_comment flatten(children, args).to_s
# attach it
ADJACENT[adj].call node, nodes[adj]
elsif name == '#pi' or name == '#processing-instruction'
# now processing instructions
if children.empty?
raise "Processing instruction must have at least a target"
end
target = children[0]
content = ''
if (c = children[1..children.length]) and c.length > 0
#warn c.inspect
content = flatten(c, args).to_s
else
content = attr.sort.map { |pair|
v = flatten(pair[1], args) or next
"#{pair[0].to_s}=\"#{v}\""
}.compact.join(' ')
end
node = Nokogiri::XML::ProcessingInstruction.new(doc, target, content)
#warn node.inspect, content
# attach it
ADJACENT[adj].call node, nodes[adj]
elsif name == '#dtd' or name == '#doctype'
# now doctype declarations
if children.empty?
raise "DTD node must have a root element declaration"
end
# assign as if these are args
root, pub, sys = children
# supplant with attributes if present
pub ||= attr[:public] if attr[:public]
sys ||= attr[:system] if attr[:system]
# XXX for some reason this is an *internal* subset?
# anyway these may not be strings and upstream is not forgiving
node = doc.create_internal_subset(root.to_s,
pub.nil? ? pub : pub.to_s,
sys.nil? ? sys : sys.to_s)
# at any rate it doesn't have to be explicitly attached
# attach it to the document
#doc.add_child node
# attach it (?)
#ADJACENT[adj].call node, nodes[adj]
elsif name == '#cdata'
# let's not forget cdata sections
node = doc.create_cdata flatten(children, args)
# attach it
ADJACENT[adj].call node, nodes[adj]
else
# finally, an element
raise 'Element name inference NOT IMPLEMENTED' unless name
# first check the name
prefix = local = nil
if name and (md = /^(?:([^:]+):)?(.+)/.match(name.to_s))
# XXX match actual qname/ncname here
prefix, local = md.captures
end
# next pull apart the namespaces and ordinary attributes
ns = {}
at = {}
attr.each do |k, v|
k = k.to_s
v = flatten(v, args) or next
if (md = /^xmlns(?::(.*))?$/i.match(k))
ns[md[1]] = v
else
at[k] = v
end
end
# now go over the attributes and set any missing namespaces to nil
at.keys.each do |k|
p, _ = /^(?:([^:]+):)?(.+)$/.match(k).captures
ns[p] ||= nil
end
# also do the tag prefix but only if there is a local name
ns[prefix] ||= nil if local
# unconditionally remove ns['xml'], we never want it in there
ns.delete 'xml'
# pseudo is a stand-in for non-parent adjacent nodes
pseudo = nodes[:pseudo] || nodes[:parent]
# now get the final namespace mapping
ns.keys.each do |k|
pk = k ? "xmlns:#{k}" : "xmlns"
if pseudo.namespaces.has_key? pk
ns[k] ||= pseudo.namespaces[pk]
end
end
# delete nil => nil
if ns.has_key? nil and ns[nil].nil?
ns.delete(nil)
end
# there should be no nil namespace declarations now
if ns.has_value? nil
raise "INTERNAL ERROR: nil namespace declaration: #{ns}"
end
# generate the node
node = element name, doc: doc, ns: ns, attr: at, args: args
# attach it
ADJACENT[adj].call node, nodes[adj]
# don't forget the children!
if children.length > 0
#warn node.inspect, children.inspect
node = markup(spec: children, doc: doc, parent: node, args: args)
end
end
else
if spec.is_a? Nokogiri::XML::Node
# existing node
node = spec.dup 1
else
# text node
node = doc.create_text_node spec.to_s
end
# attach it
ADJACENT[adj].call node, nodes[adj]
end
end
# return the node
node
end
# Generates an XHTML stub, with optional RDFa attributes. All
# parameters are optional.
#
# *This method is still under development.* I am still trying to
# figure out how I want it to behave. Some parts may not work as
# advertised.
#
# @param doc [Nokogiri::XML::Document, nil] an optional document.
#
# @param base [#to_s] the contents of +