require "uuidtools" require "asciimath" require "htmlentities" require "nokogiri" require "fileutils" class Html2Doc def initialize(hash) @filename = hash[:filename] @dir = hash[:dir] @dir1 = create_dir(@filename, @dir) @header_file = hash[:header_file] @asciimathdelims = hash[:asciimathdelims] @imagedir = hash[:imagedir] @debug = hash[:debug] @liststyles = hash[:liststyles] @stylesheet = hash[:stylesheet] @xsltemplate = Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8")) end def process(result) result = process_html(result) process_header(@header_file) generate_filelist(@filename, @dir1) File.open("#{@filename}.htm", "w:UTF-8") { |f| f.write(result) } mime_package result, @filename, @dir1 rm_temp_files(@filename, @dir, @dir1) unless @debug end def process_header(headerfile) return if headerfile.nil? doc = File.read(headerfile, encoding: "utf-8") doc = header_image_cleanup(doc, @dir1, @filename, File.dirname(@filename)) File.open("#{@dir1}/header.html", "w:UTF-8") { |f| f.write(doc) } end def clear_dir(dir) Dir.foreach(dir) do |f| fn = File.join(dir, f) File.delete(fn) if f != "." && f != ".." end dir end def create_dir(filename, dir) dir and return clear_dir(dir) dir = "#{filename}_files" Dir.mkdir(dir) unless File.exists?(dir) clear_dir(dir) end def process_html(result) docxml = to_xhtml(asciimath_to_mathml(result, @asciimathdelims)) define_head(cleanup(docxml)) msword_fix(from_xhtml(docxml)) end def rm_temp_files(filename, dir, dir1) FileUtils.rm "#{filename}.htm" FileUtils.rm_f "#{dir1}/header.html" FileUtils.rm_r dir1 unless dir end def cleanup(docxml) namespace(docxml.root) image_cleanup(docxml, @dir1, @imagedir) mathml_to_ooml(docxml) lists(docxml, @liststyles) footnotes(docxml) bookmarks(docxml) msonormal(docxml) docxml end NOKOHEAD = <<~HERE.freeze HERE def to_xhtml(xml) xml.gsub!(/<\?xml[^>]*>/, "") unless /' + xml end xml = xml.gsub(/") .gsub(//, "") Nokogiri::XML.parse(xml) end DOCTYPE = <<~"DOCTYPE".freeze DOCTYPE def from_xhtml(xml) xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "") .sub(DOCTYPE, "").gsub(%{ />}, "/>") .gsub(//, "/, "") .gsub("\n-->\n", "\n-->\n") end def msword_fix(doc) # brain damage in MSWord parser doc.gsub!(%r{}, "") doc.gsub!(%r{}, '') doc.gsub!(%r{
}, '
') doc.gsub!(%r{(") doc.gsub!(%r{}, "/>") doc.gsub!(%r{>}, "/>") doc.gsub!(%r{>}, "/>") doc.gsub!(%r{>}, "/>") doc.gsub!(%r{>}, "/>") doc.gsub!(%r{>}, "/>") doc.gsub!(%r{>}, "/>") doc.gsub!(%r{<(/)?m:(span|em)\b}, "<\\1\\2") doc.gsub!(%r{&tab;|&tab;}, '  ') doc.split(%r{(|)}).each_slice(4).map do |a| a.size > 2 and a[2] = a[2].gsub(/>\s+<") a end.join end PRINT_VIEW = <<~XML.freeze Print 100 XML def define_head1(docxml, _dir) docxml.xpath("//*[local-name() = 'head']").each do |h| h.children.first.add_previous_sibling <<~XML #{PRINT_VIEW} XML end end def filename_substitute(head, header_filename) return if header_filename.nil? head.xpath(".//*[local-name() = 'style']").each do |s| s1 = s.to_xml.gsub(/url\("[^"]+"\)/) do |m| /FILENAME/.match?(m) ? "url(cid:header.html)" : m end s.replace(s1) end end def stylesheet(_filename, _header_filename, cssname) (cssname.nil? || cssname.empty?) and cssname = File.join(File.dirname(__FILE__), "wordstyle.css") stylesheet = File.read(cssname, encoding: "UTF-8") xml = Nokogiri::XML("