require "uuidtools" require "asciimath" require "htmlentities" require "nokogiri" #require "xml/xslt" require "pp" require "fileutils" module Html2Doc def self.process(result, hash) hash[:dir1] = create_dir(hash[:filename], hash[:dir]) result = process_html(result, hash) process_header(hash[:header_file], hash) generate_filelist(hash[:filename], hash[:dir1]) File.open("#{hash[:filename]}.htm", "w:UTF-8") { |f| f.write(result) } mime_package result, hash[:filename], hash[:dir1] rm_temp_files(hash[:filename], hash[:dir], hash[:dir1]) unless hash[:debug] end def self.process_header(headerfile, hash) return if headerfile.nil? doc = File.read(headerfile, encoding: "utf-8") doc = header_image_cleanup(doc, hash[:dir1], hash[:filename], File.dirname(hash[:filename])) File.open("#{hash[:dir1]}/header.html", "w:UTF-8") { |f| f.write(doc) } end def self.create_dir(filename, dir) return dir if dir dir = "#{filename}_files" Dir.mkdir(dir) unless File.exists?(dir) dir end def self.process_html(result, hash) docxml = to_xhtml(asciimath_to_mathml(result, hash[:asciimathdelims])) define_head(cleanup(docxml, hash), hash) msword_fix(from_xhtml(docxml)) end def self.rm_temp_files(filename, dir, dir1) FileUtils.rm "#{filename}.htm" FileUtils.rm_f "#{dir1}/header.html" FileUtils.rm_r dir1 unless dir end def self.cleanup(docxml, hash) namespace(docxml.root) image_cleanup(docxml, hash[:dir1], File.dirname(hash[:filename])) mathml_to_ooml(docxml) lists(docxml, hash[:liststyles]) footnotes(docxml) bookmarks(docxml) msonormal(docxml) docxml end NOKOHEAD = <<~HERE.freeze HERE def self.to_xhtml(xml) xml.gsub!(/<\?xml[^>]*>/, "") unless /' + xml end Nokogiri::XML.parse(xml) end DOCTYPE = <<~"DOCTYPE".freeze DOCTYPE def self.from_xhtml(xml) xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, ""). sub(DOCTYPE, ""). gsub(%{ />}, "/>") end def self.msword_fix(r) # brain damage in MSWord parser r.gsub!(%r{}, '') r.gsub!(%r{
}, '
') r.gsub!(%r{(") r.gsub!(%r{}, "/>") r.gsub!(%r{>}, "/>") r.gsub!(%r{>}, "/>") r.gsub!(%r{>}, "/>") r.gsub!(%r{>}, "/>") r.gsub!(%r{>}, "/>") r.gsub!(%r{>}, "/>") r.gsub!(%r{&tab;|&tab;}, '  ') r = r.split(%r{(|)}).each_slice(4).map do |a| a.size > 2 and a[2] = a[2].gsub(/>\s+<") a end.join r end PRINT_VIEW = <<~XML.freeze XML def self.define_head1(docxml, dir) docxml.xpath("//*[local-name() = 'head']").each do |h| h.children.first.add_previous_sibling <<~XML #{PRINT_VIEW} XML end end def self.filename_substitute(stylesheet, header_filename, filename) if header_filename.nil? stylesheet.gsub!(/\n[^\n]*FILENAME[^\n]*i\n/, "\n") else stylesheet.gsub!(/FILENAME/, File.basename(filename)) end stylesheet end def self.stylesheet(filename, header_filename, fn) (fn.nil? || fn.empty?) && fn = File.join(File.dirname(__FILE__), "wordstyle.css") stylesheet = File.read(fn, encoding: "UTF-8") stylesheet = filename_substitute(stylesheet, header_filename, filename) xml = Nokogiri::XML("