lib/html2doc/math.rb in html2doc-1.0.5 vs lib/html2doc/math.rb in html2doc-1.0.6
- old
+ new
@@ -1,9 +1,10 @@
require "uuidtools"
require "asciimath"
require "htmlentities"
require "nokogiri"
+require "plane1converter"
module Html2Doc
@xsltemplate =
Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
encoding: "utf-8"))
@@ -23,13 +24,23 @@
a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
a.size > 1 ? a[0] + a[2] : a[0]
end.join
end
+ def self.unwrap_accents(doc)
+ doc.xpath("//*[@accent = 'true']").each do |x|
+ x.elements.length > 1 or next
+ x.elements[1].name == "mrow" and
+ x.elements[1].replace(x.elements[1].children)
+ end
+ doc
+ end
+
# random fixes to MathML input that OOXML needs to render properly
def self.ooxml_cleanup(m, docnamespaces)
- m = mathml_preserve_space(mathml_insert_rows(m, docnamespaces), docnamespaces)
+ m = unwrap_accents(mathml_preserve_space(
+ mathml_insert_rows(m, docnamespaces), docnamespaces))
m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
m
end
def self.mathml_insert_rows(m, docnamespaces)
@@ -47,22 +58,60 @@
end
m
end
def self.unitalic(m)
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'p']]").each do |x|
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
x.wrap("<span style='font-style:normal;'></span>")
end
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'bi']]").each do |x|
- x.wrap("<span style='font-style:italic;font-weight:bold;'></span>")
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
+ x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
end
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'i']]").each do |x|
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
x.wrap("<span class='nostem'><em></em></span>")
end
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'b']]").each do |x|
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
end
+ m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
+ toPlane1(x, :monospace)
+ end
+ m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
+ toPlane1(x, :doublestruck)
+ end
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
+ toPlane1(x, :script)
+ end
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
+ toPlane1(x, :scriptbold)
+ end
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
+ toPlane1(x, :fraktur)
+ end
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
+ toPlane1(x, :frakturbold)
+ end
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
+ toPlane1(x, :sans)
+ end
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
+ toPlane1(x, :sansbold)
+ end
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
+ toPlane1(x, :sansitalic)
+ end
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
+ toPlane1(x, :sansbolditalic)
+ end
m
+ end
+
+ def self.toPlane1(x, font)
+ x.traverse do |n|
+ next unless n.text?
+ n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
+ end
+ x
end
def self.mathml_to_ooml(docxml)
docnamespaces = docxml.collect_namespaces
m = docxml.xpath("//*[local-name() = 'math']")