lib/html2doc/math.rb in html2doc-1.1.1 vs lib/html2doc/math.rb in html2doc-1.1.2
- old
+ new
@@ -23,17 +23,22 @@
def self.asciimath_to_mathml(doc, delims)
return doc if delims.nil? || delims.size < 2
m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
m.each_slice(4).map.with_index do |(*a), i|
- i % 500 == 0 && m.size > 1000 && i > 0 and
- warn "MathML #{i} of #{(m.size / 4).floor}"
+ progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
a.size > 1 ? a[0] + a[2] : a[0]
end.join
end
+ def self.progress_conv(idx, step, total, threshold, msg)
+ return unless (idx % step).zero? && total > threshold && idx.positive?
+
+ warn "#{msg} #{idx} of #{total}"
+ end
+
def self.unwrap_accents(doc)
doc.xpath("//*[@accent = 'true']").each do |x|
x.elements.length > 1 or next
x.elements[1].name == "mrow" and
x.elements[1].replace(x.elements[1].children)
@@ -67,22 +72,24 @@
x.children = x.children.to_xml.gsub(/^\s/, " ").gsub(/\s$/, " ")
end
math
end
+ HTML_NS = 'xmlns="http://www.w3.org/1999/xhtml"'.freeze
+
def self.unitalic(math)
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
- x.wrap("<span style='font-style:normal;'></span>")
+ x.wrap("<span #{HTML_NS} style='font-style:normal;'></span>")
end
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
- x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
+ x.wrap("<span #{HTML_NS} class='nostem' style='font-weight:bold;'><em></em></span>")
end
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
- x.wrap("<span class='nostem'><em></em></span>")
+ x.wrap("<span #{HTML_NS} class='nostem'><em></em></span>")
end
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
- x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
+ x.wrap("<span #{HTML_NS} style='font-style:normal;font-weight:bold;'></span>")
end
math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
to_plane1(x, :monospace)
end
math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
@@ -126,24 +133,34 @@
def self.mathml_to_ooml(docxml)
docnamespaces = docxml.collect_namespaces
m = docxml.xpath("//*[local-name() = 'math']")
m.each_with_index do |x, i|
- i % 100 == 0 && m.size > 500 && i > 0 and
- warn "Math OOXML #{i} of #{m.size}"
- element = ooxml_cleanup(x, docnamespaces)
- doc = Nokogiri::XML::Document::new
- doc.root = element
- ooxml = unitalic(esc_space(@xsltemplate.transform(doc))).to_s
- .gsub(/<\?[^>]+>\s*/, "")
- .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
- .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
- ooxml = uncenter(x, ooxml)
- x.swap(ooxml)
+ progress_conv(i, 100, m.size, 500, "Math OOXML")
+ mathml_to_ooml1(x, docnamespaces)
end
end
+ # We need span and em not to be namespaced. Word can't deal with explicit
+ # namespaces.
+ # We will end up stripping them out again under Nokogiri 1.11, which correctly
+ # insists on inheriting namespace from parent.
+ def self.ooml_clean(xml)
+ xml.to_s
+ .gsub(/<\?[^>]+>\s*/, "")
+ .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
+ .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
+ end
+
+ def self.mathml_to_ooml1(xml, docnamespaces)
+ doc = Nokogiri::XML::Document::new
+ doc.root = ooxml_cleanup(xml, docnamespaces)
+ ooxml = ooml_clean(unitalic(esc_space(@xsltemplate.transform(doc))))
+ ooxml = uncenter(xml, ooxml)
+ xml.swap(ooxml)
+ end
+
# escape space as 2; we are removing any spaces generated by
# XML indentation
def self.esc_space(xml)
xml.traverse do |n|
next unless n.text?
@@ -155,10 +172,10 @@
# if oomml has no siblings, by default it is centered; override this with
# left/right if parent is so tagged
def self.uncenter(math, ooxml)
alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
- "local-name() = 'div' or local-name() = 'td']/@style")
+ "local-name() = 'div' or local-name() = 'td']/@style")
return ooxml unless alignnode && (math.next == nil && math.previous == nil)
%w(left right).each do |dir|
if alignnode.text.include? ("text-align:#{dir}")
ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\