lib/utils/xml.rb in metanorma-utils-1.4.4.2 vs lib/utils/xml.rb in metanorma-utils-1.4.5
- old
+ new
@@ -4,16 +4,23 @@
require "htmlentities"
require "nokogiri"
module Metanorma
module Utils
- NAMECHAR = "\u0000-\u002c\u002f\u003a-\u0040\\u005b-\u005e"\
- "\u0060\u007b-\u00b6\u00b8-\u00bf\u00d7\u00f7\u037e"\
- "\u2000-\u200b"\
+ NAMECHAR = "\u0000-\u002c\u002f\u003a-\u0040\\u005b-\u005e" \
+ "\u0060\u007b-\u00b6\u00b8-\u00bf\u00d7\u00f7\u037e" \
+ "\u2000-\u200b" \
"\u200e-\u203e\u2041-\u206f\u2190-\u2bff\u2ff0-\u3000".freeze
- NAMESTARTCHAR = "\\u002d\u002e\u0030-\u0039\u00b7\u0300-\u036f"\
+ NAMESTARTCHAR = "\\u002d\u002e\u0030-\u0039\u00b7\u0300-\u036f" \
"\u203f-\u2040".freeze
+ NOKOHEAD = <<~HERE.freeze
+ <!DOCTYPE html SYSTEM
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+ <html xmlns="http://www.w3.org/1999/xhtml">
+ <head> <title></title> <meta charset="UTF-8" /> </head>
+ <body> </body> </html>
+ HERE
class << self
def to_ncname(tag, asciionly: true)
asciionly and tag = HTMLEntities.new.encode(tag, :basic, :hexadecimal)
start = tag[0]
@@ -29,18 +36,10 @@
def anchor_or_uuid(node = nil)
uuid = UUIDTools::UUID.random_create
node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id
end
- NOKOHEAD = <<~HERE.freeze
- <!DOCTYPE html SYSTEM
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml">
- <head> <title></title> <meta charset="UTF-8" /> </head>
- <body> </body> </html>
- HERE
-
# block for processing XML document fragments as XHTML,
# to allow for HTMLentities
# Unescape special chars used in Asciidoctor substitution processing
def noko(&block)
doc = ::Nokogiri::XML.parse(NOKOHEAD)
@@ -53,14 +52,38 @@
.gsub("—", "\u0097").gsub("–", "\u0096")
.gsub("—", "\u0097")
end
end
+ def noko_html(&block)
+ doc = ::Nokogiri::XML.parse(NOKOHEAD)
+ fragment = doc.fragment("")
+ ::Nokogiri::XML::Builder.with fragment, &block
+ fragment.to_xml(encoding: "US-ASCII").lines.map do |l|
+ l.gsub(/\s*\n/, "")
+ end
+ end
+
+ def to_xhtml_fragment(xml)
+ doc = ::Nokogiri::XML.parse(NOKOHEAD)
+ doc.fragment(xml)
+ end
+
def ns(xpath)
xpath.gsub(%r{/([a-zA-z])}, "/xmlns:\\1")
.gsub(%r{::([a-zA-z])}, "::xmlns:\\1")
.gsub(%r{\[([a-zA-z][a-z0-9A-Z@/-]* ?=)}, "[xmlns:\\1")
.gsub(%r{\[([a-zA-z][a-z0-9A-Z@/-]*[/\[\]])}, "[xmlns:\\1")
+ end
+
+ def numeric_escapes(xml)
+ c = HTMLEntities.new
+ xml.split(/(&[^ \r\n\t#;]+;)/).map do |t|
+ if /^(&[^ \t\r\n#;]+;)/.match?(t)
+ c.encode(c.decode(t), :hexadecimal)
+ else t
+ end
+ end.join
end
end
end
end