lib/html2doc/math.rb in html2doc-0.9.0 vs lib/html2doc/math.rb in html2doc-0.9.1
- old
+ new
@@ -2,58 +2,61 @@
require "asciimath"
require "htmlentities"
require "nokogiri"
module Html2Doc
- @xsltemplate = Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8"))
+ @xsltemplate =
+ Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
+ encoding: "utf-8"))
def self.asciimath_to_mathml1(x)
AsciiMath.parse(HTMLEntities.new.decode(x)).to_mathml.
gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
end
def self.asciimath_to_mathml(doc, delims)
return doc if delims.nil? || delims.size < 2
m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
m.each_slice(4).map.with_index do |(*a), i|
- warn "MathML #{i} of #{(m.size / 4).floor}" if i % 500 == 0 && m.size > 1000 && i > 0
+ i % 500 == 0 && m.size > 1000 && i > 0 and
+ warn "MathML #{i} of #{(m.size / 4).floor}"
a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
a.size > 1 ? a[0] + a[2] : a[0]
end.join
end
# random fixes to MathML input that OOXML needs to render properly
def self.ooxml_cleanup(m, docnamespaces)
- m.xpath(".//xmlns:msup[name(preceding-sibling::*[1])='munderover']",
- docnamespaces).each do |x|
- x1 = x.replace("<mrow></mrow>").first
- x1.children = x
+ m.xpath(%w(msup msub msubsup munder mover munderover).
+ map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
+ next unless x.next_element && x.next_element != "mrow"
+ x.next_element.wrap("<mrow/>")
end
m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
m
end
def self.mathml_to_ooml(docxml)
docnamespaces = docxml.collect_namespaces
m = docxml.xpath("//*[local-name() = 'math']")
m.each_with_index do |x, i|
- warn "Math OOXML #{i} of #{m.size}" if i % 100 == 0 && m.size > 500 && i > 0
+ i % 100 == 0 && m.size > 500 && i > 0 and
+ warn "Math OOXML #{i} of #{m.size}"
element = ooxml_cleanup(x, docnamespaces)
-
doc = Nokogiri::XML::Document::new()
doc.root = element
-
- ooxml = @xsltemplate.transform(doc).to_s.
+ ooxml = (esc_space(@xsltemplate.transform(doc))).to_s.
gsub(/<\?[^>]+>\s*/, "").
gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
gsub(%r{<(/)?([a-z])}, "<\\1m:\\2")
- ooxml = uncenter(esc_space(x), ooxml)
+ ooxml = uncenter(x, ooxml)
x.swap(ooxml)
end
end
- # escape space as 2; we are removing any spaces generated by XML indentation
+ # escape space as 2; we are removing any spaces generated by
+ # XML indentation
def self.esc_space(xml)
xml.traverse do |n|
next unless n.text?
n = n.text.gsub(/ /, "2")
end
@@ -62,11 +65,12 @@
# if oomml has no siblings, by default it is centered; override this with
# left/right if parent is so tagged
def self.uncenter(m, ooxml)
if m.next == nil && m.previous == nil
- alignnode = m.at(".//ancestor::*[@style][local-name() = 'p' or local-name() = "\
- "'div' or local-name() = 'td']/@style") or return ooxml
+ alignnode = m.at(".//ancestor::*[@style][local-name() = 'p' or "\
+ "local-name() = 'div' or local-name() = 'td']/@style")
+ return ooxml unless alignnode
if alignnode.text.include? ("text-align:left")
ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
"m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
elsif alignnode.text.include? ("text-align:right")
ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\