require "asciidoctor" require "tempfile" require "uuidtools" require "htmlentities" require "nokogiri" module Metanorma module Utils NAMECHAR = "\u0000-\u002c\u002f\u003a-\u0040\\u005b-\u005e" \ "\u0060\u007b-\u00b6\u00b8-\u00bf\u00d7\u00f7\u037e" \ "\u2000-\u200b" \ "\u200e-\u203e\u2041-\u206f\u2190-\u2bff\u2ff0-\u3000".freeze NAMESTARTCHAR = "\\u002d\u002e\u0030-\u0039\u00b7\u0300-\u036f" \ "\u203f-\u2040".freeze NOKOHEAD = <<~HERE.freeze HERE class << self def attr_code(attributes) attributes.compact.transform_values do |v| v.is_a?(String) ? HTMLEntities.new.decode(v) : v end end def to_ncname(tag, asciionly: true) asciionly and tag = HTMLEntities.new.encode(tag, :basic, :hexadecimal) start = tag[0] ret1 = if %r([#{NAMECHAR}#])o.match?(start) "_" else (%r([#{NAMESTARTCHAR}#])o.match?(start) ? "_#{start}" : start) end ret2 = tag[1..-1] || "" (ret1 || "") + ret2.gsub(%r([#{NAMECHAR}#])o, "_") end def anchor_or_uuid(node = nil) uuid = UUIDTools::UUID.random_create node.nil? || node.id.nil? || node.id.empty? ? "_#{uuid}" : node.id end # block for processing XML document fragments as XHTML, # to allow for HTMLentities # Unescape special chars used in Asciidoctor substitution processing def noko(script = "Latn", &block) doc = ::Nokogiri::XML.parse(NOKOHEAD) fragment = doc.fragment("") ::Nokogiri::XML::Builder.with fragment, &block eoln = %w(Hans Hant Jpan).include?(script) ? "" : " " fragment.to_xml(encoding: "US-ASCII", indent: 0, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML) .lines.map do |l| l.gsub(/>\n$/, ">").gsub(/\n$/m, eoln).gsub("–", "\u0096") .gsub("—", "\u0097").gsub("–", "\u0096") .gsub("—", "\u0097") end end def noko_html(&block) doc = ::Nokogiri::XML.parse(NOKOHEAD) fragment = doc.fragment("") ::Nokogiri::XML::Builder.with fragment, &block fragment.to_xml(encoding: "UTF-8", indent: 0, save_with: Nokogiri::XML::Node::SaveOptions::AS_XML) .lines.map do |l| l.gsub(/\s*\n/, "") end end def to_xhtml_fragment(xml) doc = ::Nokogiri::XML.parse(NOKOHEAD) doc.fragment(xml) end def ns(xpath) xpath.gsub(%r{/([a-zA-z])}, "/xmlns:\\1") .gsub(%r{::([a-zA-z])}, "::xmlns:\\1") .gsub(%r{\[([a-zA-z][a-z0-9A-Z@/-]* ?=)}, "[xmlns:\\1") .gsub(%r{\[([a-zA-z][a-z0-9A-Z@/-]*[/\[\]])}, "[xmlns:\\1") end def numeric_escapes(xml) c = HTMLEntities.new xml.split(/(&[^ \r\n\t#;]+;)/).map do |t| if /^(&[^ \t\r\n#;]+;)/.match?(t) c.encode(c.decode(t), :hexadecimal) else t end end.join end # if the contents of node are blocks, output them to out; # else, wrap them in

def wrap_in_para(node, out) if node.blocks? then out << node.content else out.p { |p| p << node.content } end end # all element/attribute pairs that are ID anchors in Metanorma def anchor_attributes [%w[* id], %w[* bibitemid], %w[review from], %w[review to], %w[index to], %w[xref target], %w[callout target], %w[location target]] end # convert definition list term/value pair into Nokogiri XML attribute def dl_to_attrs(elem, dlist, name) e = dlist.at("./dt[text()='#{name}']") or return val = e.at("./following::dd/p") || e.at("./following::dd") or return elem[name] = val.text end # convert definition list term/value pairs into Nokogiri XML elements def dl_to_elems(ins, elem, dlist, name) a = elem.at("./#{name}[last()]") ins = a if a dlist.xpath("./dt[text()='#{name}']").each do |e| ins = dl_to_elems1(e, name, ins) end ins end def dl_to_elems1(term, name, ins) v = term.at("./following::dd") e = v.elements and e.size == 1 && e.first.name == "p" and v = e.first v.name = name ins.next = v ins.next end end end end