require "nokogiri" require "pathname" require "html2doc" require "asciimath2unitsml" require_relative "./cleanup_block" require_relative "./cleanup_footnotes" require_relative "./cleanup_ref" require_relative "./cleanup_ref_dl" require_relative "./cleanup_boilerplate" require_relative "./cleanup_section" require_relative "./cleanup_terms" require_relative "./cleanup_inline" require_relative "./cleanup_amend" require "relaton_iev" module Asciidoctor module Standoc module Cleanup def asciimath2mathml(text) text = text.gsub(%r{(.+?)}m) do "#{HTMLEntities.new.decode($1)}" end text = Html2Doc.asciimath_to_mathml(text, ["", ""]) x = Nokogiri::XML(text) x.xpath("//*[local-name() = 'math'][not(parent::stem)]").each do |y| y.wrap("") end x.to_xml end def xml_unescape_mathml(xml) return if xml.children.any? { |y| y.element? } math = xml.text.gsub(/</, "<").gsub(/>/, ">") .gsub(/"/, '"').gsub(/'/, "'").gsub(/&/, "&") .gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, " MATHML_NS).each do |x| x.children = x.children.to_xml .gsub(/^\s/, " ").gsub(/\s$/, " ") end end def mathml_namespace(stem) stem.xpath("./math").each { |x| x.default_namespace = MATHML_NS } end def mathml_mi_italics { uppergreek: true, upperroman: true, lowergreek: true, lowerroman: true } end # presuppose multichar mi upright, singlechar mi MathML default italic def mathml_italicise(xml) xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]", "m" => MATHML_NS).each do |i| char = HTMLEntities.new.decode(i.text) i["mathvariant"] = "normal" if mi_italicise?(char) end end def mi_italicise?(char) return false if char.length > 1 if /\p{Greek}/.match?(char) /\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek] || /\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek] elsif /\p{Latin}/.match?(char) /\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman] || /\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman] else false end end UNITSML_NS = "https://schema.unitsml.org/unitsml/1.0".freeze def add_misc_container(xmldoc) unless ins = xmldoc.at("//misc-container") a = xmldoc.at("//termdocsource") || xmldoc.at("//bibdata") a.next = "" ins = xmldoc.at("//misc-container") end ins end def mathml_unitsML(xmldoc) return unless xmldoc.at(".//m:*", "m" => UNITSML_NS) misc = add_misc_container(xmldoc) unitsml = misc.add_child("").first %w(Unit CountedItem Quantity Dimension Prefix).each do |t| gather_unitsml(unitsml, xmldoc, t) end end def gather_unitsml(unitsml, xmldoc, tag) tags = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS) .each_with_object({}) do |x, m| m[x["id"]] = x.remove end return if tags.empty? set = unitsml.add_child("<#{tag}Set/>").first tags.each_value { |v| set << v } end def asciimath2unitsml_options { multiplier: :space } end def mathvariant_override(inner, outer) case outer when "bold" case inner when "normal" then "bold" when "italic" then "bold-italic" when "fraktur" then "bold-fraktur" when "script" then "bold-script" when "sans-serif" then "bold-sans-serif" when "sans-serif-italic" then "sans-serif-bold-italic" else inner end when "italic" case inner when "normal" then "italic" when "bold" then "bold-italic" when "sans-serif" then "sans-serif-italic" when "bold-sans-serif" then "sans-serif-bold-italic" else inner end when "bold-italic" case inner when "normal", "bold", "italic" then "bold-italic" when "sans-serif", "bold-sans-serif", "sans-serif-italic" "sans-serif-bold-italic" else inner end when "fraktur" case inner when "normal" then "fraktur" when "bold" then "bold-fraktur" else inner end when "bold-fraktur" case inner when "normal", "fraktur" then "bold-fraktur" else inner end when "script" case inner when "normal" then "script" when "bold" then "bold-script" else inner end when "bold-script" case inner when "normal", "script" then "bold-script" else inner end when "sans-serif" case inner when "normal" then "sans-serif" when "bold" then "bold-sans-serif" when "italic" then "sans-serif-italic" when "bold-italic" then "sans-serif-bold-italic" else inner end when "bold-sans-serif" case inner when "normal", "bold", "sans-serif" then "bold-sans-serif" when "italic", "bold-italic", "sans-serif-italic" "sans-serif-bold-italic" else inner end when "sans-serif-italic" case inner when "normal", "italic", "sans-serif" then "sans-serif-italic" when "bold", "bold-italic", "sans-serif-bold" "sans-serif-bold-italic" else inner end when "sans-serif-bold-italic" case inner when "normal", "italic", "sans-serif", "sans-serif-italic", "bold", "bold-italic", "sans-serif-bold" "sans-serif-bold-italic" else inner end else inner end end def mathml_mathvariant(math) math.xpath(".//*[@mathvariant]").each do |outer| outer.xpath(".//*[@mathvariant]").each do |inner| inner["mathvariant"] = mathvariant_override(outer["mathvariant"], inner["mathvariant"]) end end end def mathml_cleanup(xmldoc) unitsml = Asciimath2UnitsML::Conv.new(asciimath2unitsml_options) xmldoc.xpath("//stem[@type = 'MathML']").each do |x| xml_unescape_mathml(x) mathml_namespace(x) mathml_preserve_space(x) unitsml.MathML2UnitsML(x) mathml_mathvariant(x) mathml_italicise(x) end mathml_unitsML(xmldoc) end end end end