require "asciimath2unitsml" module Metanorma module Standoc module Cleanup def asciimath2mathml(text) text = text.gsub(%r{(.+?)}m) do "#{@c.decode($1)}" end text = Html2Doc.new({}) .asciimath_to_mathml(text, ["", ""], retain_asciimath: true) asciimath2mathml_wrap(text) end def asciimath2mathml_wrap(text) x = Nokogiri::XML(text) x.xpath("//*[local-name() = 'math'][not(parent::stem)]").each do |y| y.wrap("") end x.xpath("//stem").each do |y| y.next_element&.name == "asciimath" and y << y.next_element end to_xml(x) end def xml_unescape_mathml(xml) return if xml.children.any?(&:element?) math = xml.text.gsub(/</, "<").gsub(/>/, ">") .gsub(/"/, '"').gsub(/'/, "'").gsub(/&/, "&") .gsub(/<[^: \r\n\t\/]+:/, "<").gsub(/<\/[^ \r\n\t:]+:/, " MATHML_NS).each do |x| x.children = x.children.to_xml .gsub(/^\s/, " ").gsub(/\s$/, " ") end end def mathml_namespace(stem) stem.xpath("./math").each { |x| x.default_namespace = MATHML_NS } end def mathml_mi_italics { uppergreek: true, upperroman: true, lowergreek: true, lowerroman: true } end # presuppose multichar mi upright, singlechar mi MathML default italic def mathml_italicise(xml) xml.xpath(".//m:mi[not(ancestor::*[@mathvariant])]", "m" => MATHML_NS).each do |i| char = @c.decode(i.text) i["mathvariant"] = "normal" if mi_italicise?(char) end end def mi_italicise?(char) return false if char.length > 1 case char when /\p{Greek}/ (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowergreek]) || (/\p{Upper}/.match(char) && !mathml_mi_italics[:uppergreek]) when /\p{Latin}/ (/\p{Lower}/.match(char) && !mathml_mi_italics[:lowerroman]) || (/\p{Upper}/.match(char) && !mathml_mi_italics[:upperroman]) else false end end UNITSML_NS = "https://schema.unitsml.org/unitsml/1.0".freeze def add_misc_container(xmldoc) unless ins = xmldoc.at("//metanorma-extension") a = xmldoc.at("//termdocsource") || xmldoc.at("//bibdata") a.next = "" ins = xmldoc.at("//metanorma-extension") end ins end def mathml_unitsML(xmldoc) return unless xmldoc.at(".//m:*", "m" => UNITSML_NS) misc = add_misc_container(xmldoc) unitsml = misc.add_child("").first %w(Unit CountedItem Quantity Dimension Prefix).each do |t| gather_unitsml(unitsml, xmldoc, t) end end def gather_unitsml(unitsml, xmldoc, tag) tags = xmldoc.xpath(".//m:#{tag}", "m" => UNITSML_NS) .each_with_object({}) do |x, m| m[x["id"]] = x.remove end return if tags.empty? set = unitsml.add_child("<#{tag}Set/>").first tags.each_value { |v| set << v } end def asciimath2unitsml_options { multiplier: :space } end MATHVARIANT_OVERRIDE = { bold: { normal: "bold", italic: "bold-italic", fraktur: "bold-fraktur", script: "bold-script", "sans-serif": "bold-sans-serif", "sans-serif-italic": "sans-serif-bold-italic" }, italic: { normal: "italic", bod: "bold-italic", "sans-serif": "sans-serif-italic", "bold-sans-serif": "sans-serif-bold-italic" }, "bold-italic": { normal: "bold-italic", bold: "bold-italic", italic: "bold-italic", "sans-serif": "sans-serif-bold-italic", "bold-sans-serif": "sans-serif-bold-italic", "sans-serif-italic": "sans-serif-bold-italic" }, fraktur: { normal: "fraktur", bold: "bold-fraktur" }, "bold-fraktur": { normal: "bold-fraktur", fraktur: "bold-fraktur" }, script: { normal: "script", bold: "bold-script" }, "bold-script": { normal: "script", script: "bold-script" }, "sans-serif": { normal: "sans-serif", bold: "bold-sans-serif", italic: "sans-serif-italic", "bold-italic": "sans-serif-bold-italic" }, "bold-sans-serif": { normal: "bold-sans-serif", bold: "bold-sans-serif", "sans-serif": "bold-sans-serif", italic: "sans-serif-bold-italic", "bold-italic": "sans-serif-bold-italic", "sans-serif-italic": "sans-serif-bold-italic" }, "sans-serif-italic": { normal: "sans-serif-italic", italic: "sans-serif-italic", "sans-serif": "sans-serif-italic", bold: "sans-serif-bold-italic", "bold-italic": "sans-serif-bold-italic", "sans-serif-bold": "sans-serif-bold-italic" }, "sans-serif-bold-italic": { normal: "sans-serif-bold-italic", italic: "sans-serif-bold-italic", "sans-serif": "sans-serif-bold-italic", "sans-serif-italic": "sans-serif-bold-italic", bold: "sans-serif-bold-italic", "bold-italic": "sans-serif-bold-italic", "sans-serif-bold": "sans-serif-bold-italic" }, }.freeze def mathvariant_override(inner, outer) o = outer.to_sym i = inner.to_sym MATHVARIANT_OVERRIDE[o] or return inner MATHVARIANT_OVERRIDE[o][i] || inner end def mathml_mathvariant(math) math.xpath(".//*[@mathvariant]").each do |outer| outer.xpath(".//*[@mathvariant]").each do |inner| inner["mathvariant"] = mathvariant_override(inner["mathvariant"], outer["mathvariant"]) end end end def mathml_cleanup(xmldoc) unitsml = Asciimath2UnitsML::Conv.new(asciimath2unitsml_options) xmldoc.xpath("//stem[@type = 'MathML']").each do |x| xml_unescape_mathml(x) mathml_namespace(x) mathml_preserve_space(x) unitsml.MathML2UnitsML(x) mathml_mathvariant(x) mathml_italicise(x) end mathml_unitsML(xmldoc) end end end end