lib/html2doc/math.rb in html2doc-1.0.5 vs lib/html2doc/math.rb in html2doc-1.0.6

- old
+ new

@@ -1,9 +1,10 @@ require "uuidtools" require "asciimath" require "htmlentities" require "nokogiri" +require "plane1converter" module Html2Doc @xsltemplate = Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8")) @@ -23,13 +24,23 @@ a[2].nil? || a[2] = asciimath_to_mathml1(a[2]) a.size > 1 ? a[0] + a[2] : a[0] end.join end + def self.unwrap_accents(doc) + doc.xpath("//*[@accent = 'true']").each do |x| + x.elements.length > 1 or next + x.elements[1].name == "mrow" and + x.elements[1].replace(x.elements[1].children) + end + doc + end + # random fixes to MathML input that OOXML needs to render properly def self.ooxml_cleanup(m, docnamespaces) - m = mathml_preserve_space(mathml_insert_rows(m, docnamespaces), docnamespaces) + m = unwrap_accents(mathml_preserve_space( + mathml_insert_rows(m, docnamespaces), docnamespaces)) m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML") m end def self.mathml_insert_rows(m, docnamespaces) @@ -47,22 +58,60 @@ end m end def self.unitalic(m) - m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'p']]").each do |x| + m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x| x.wrap("<span style='font-style:normal;'></span>") end - m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'bi']]").each do |x| - x.wrap("<span style='font-style:italic;font-weight:bold;'></span>") + m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x| + x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>") end - m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'i']]").each do |x| + m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x| x.wrap("<span class='nostem'><em></em></span>") end - m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'b']]").each do |x| + m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x| x.wrap("<span style='font-style:normal;font-weight:bold;'></span>") end + m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x| + toPlane1(x, :monospace) + end + m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x| + toPlane1(x, :doublestruck) + end + m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x| + toPlane1(x, :script) + end + m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x| + toPlane1(x, :scriptbold) + end + m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x| + toPlane1(x, :fraktur) + end + m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x| + toPlane1(x, :frakturbold) + end + m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x| + toPlane1(x, :sans) + end + m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x| + toPlane1(x, :sansbold) + end + m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x| + toPlane1(x, :sansitalic) + end + m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x| + toPlane1(x, :sansbolditalic) + end m + end + + def self.toPlane1(x, font) + x.traverse do |n| + next unless n.text? + n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font)) + end + x end def self.mathml_to_ooml(docxml) docnamespaces = docxml.collect_namespaces m = docxml.xpath("//*[local-name() = 'math']")