require "fileutils" require_relative "./postprocess_cover.rb" module IsoDoc::WordFunction module Postprocess # add namespaces for Word fragments WORD_NOKOHEAD = <<~HERE.freeze HERE def to_word_xhtml_fragment(xml) doc = ::Nokogiri::XML.parse(WORD_NOKOHEAD) fragment = ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root) fragment end def table_note_cleanup(docxml) super # preempt html2doc putting MsoNormal there docxml.xpath("//p[not(self::*[@class])]"\ "[ancestor::*[@class = 'Note']]").each do |p| p["class"] = "Note" end end def postprocess(result, filename, dir) filename = filename.sub(/\.doc$/, "") header = generate_header(filename, dir) result = from_xhtml(cleanup(to_xhtml(textcleanup(result)))) toWord(result, filename, dir, header) @files_to_delete.each { |f| FileUtils.rm_f f } end def toWord(result, filename, dir, header) result = from_xhtml(word_cleanup(to_xhtml(result))) unless @landscapestyle.empty? @wordstylesheet&.open @wordstylesheet&.write(@landscapestyle) @wordstylesheet&.close end Html2Doc.process(result, filename: filename, stylesheet: @wordstylesheet&.path, header_file: header&.path, dir: dir, asciimathdelims: [@openmathdelim, @closemathdelim], liststyles: { ul: @ulstyle, ol: @olstyle }) header&.unlink @wordstylesheet&.unlink end def word_admonition_images(docxml) docxml.xpath("//div[@class = 'Admonition']//img").each do |i| i["width"], i["height"] = Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300) end end def word_cleanup(docxml) word_annex_cleanup(docxml) word_preface(docxml) word_nested_tables(docxml) word_table_align(docxml) word_table_separator(docxml) word_admonition_images(docxml) word_list_continuations(docxml) word_example_cleanup(docxml) word_pseudocode_cleanup(docxml) word_image_caption(docxml) word_section_breaks(docxml) authority_cleanup(docxml) word_footnote_format(docxml) docxml end def word_nested_tables(docxml) docxml.xpath("//table").each do |t| t.xpath(".//table").reverse.each do |tt| t.next = tt.remove end end end def authority_cleanup1(docxml, klass) dest = docxml.at("//div[@id = 'boilerplate-#{klass}-destination']") auth = docxml.at("//div[@id = 'boilerplate-#{klass}' or @class = 'boilerplate-#{klass}']") auth&.xpath(".//h1[not(text())] | .//h2[not(text())]")&.each { |h| h.remove } auth&.xpath(".//h1 | .//h2")&.each do |h| h.name = "p" h["class"] = "TitlePageSubhead" end dest and auth and dest.replace(auth.remove) end def authority_cleanup(docxml) %w(copyright license legal feedback).each do |t| authority_cleanup1(docxml, t) end end def style_update(node, css) return unless node node["style"] = node["style"] ? node["style"].sub(/;?$/, ";#{css}") : css end def word_image_caption(docxml) docxml.xpath("//p[@class = 'FigureTitle' or @class = 'SourceTitle']"). each do |t| if t&.previous_element&.name == "img" img = t.previous_element t.previous_element.swap("

#{img.to_xml}

") end style_update(t&.previous_element, "page-break-after:avoid;") end end def word_list_continuations(docxml) list_add(docxml.xpath("//ul[not(ancestor::ul) and not(ancestor::ol)]"), 1) list_add(docxml.xpath("//ol[not(ancestor::ul) and not(ancestor::ol)]"), 1) end def list_add(xpath, lvl) xpath.each do |list| (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |l| l.xpath("./p | ./div").each_with_index do |p, i| next if i == 0 p.wrap(%{
}) end list_add(l.xpath(".//ul") - l.xpath(".//ul//ul | .//ol//ul"), lvl + 1) list_add(l.xpath(".//ol") - l.xpath(".//ul//ol | .//ol//ol"), lvl + 1) end end end def word_table_align(docxml) docxml.xpath("//td[@align]/p | //th[@align]/p").each do |p| next if p["align"] style_update(p, "text-align: #{p.parent["align"]}") end end =begin EMPTY_PARA = "

"\ " 

" def table_after_table(docxml) docxml.xpath("//table[following-sibling::*[1]/self::table]").each do |t| t.add_next_sibling(EMPTY_PARA) end end =end def word_table_separator(docxml) docxml.xpath("//p[@class = 'TableTitle']").each do |t| next unless t.children.empty? t["style"] = t["style"].sub(/;?$/, ";font-size:0pt;") t.children = " " end end def word_annex_cleanup(docxml) end def word_example_cleanup(docxml) docxml.xpath("//div[@class = 'example']//p[not(@class)]").each do |p| p["class"] = "example" end end def word_pseudocode_cleanup(docxml) docxml.xpath("//div[@class = 'pseudocode']//p[not(@class)]").each do |p| p["class"] = "pseudocode" end end def generate_header(filename, _dir) return nil unless @header template = IsoDoc::Common.liquid(File.read(@header, encoding: "UTF-8")) meta = @meta.get meta[:filename] = filename params = meta.map { |k, v| [k.to_s, v] }.to_h Tempfile.open(%w(header html), :encoding => "utf-8") do |f| f.write(template.render(params)) f end end def word_section_breaks(docxml) @landscapestyle = "" word_section_breaks1(docxml, "WordSection2") word_section_breaks1(docxml, "WordSection3") word_remove_pb_before_annex(docxml) docxml.xpath("//br[@orientation]").each { |br| br.delete("orientation") } end def word_section_breaks1(docxml, sect) docxml.xpath("//div[@class = '#{sect}']//br[@orientation]").reverse. each_with_index do |br, i| @landscapestyle += "\ndiv.#{sect}_#{i} {page:#{sect}"\ "#{br["orientation"] == "landscape" ? "L" : "P"};}\n" split_at_section_break(docxml, sect, br, i) end end def split_at_section_break(docxml, sect, br, i) move = br.parent.xpath("following::node()") & br.document.xpath("//div[@class = '#{sect}']//*") ins = docxml.at("//div[@class = '#{sect}']"). after("
").next_element move.each do |m| next if m.at("./ancestor::div[@class = '#{sect}_#{i}']") ins << m.remove end end # applies for

... def word_remove_pb_before_annex(docxml) docxml.xpath("//div[p/br]").each do |d| /^WordSection\d+_\d+$/.match(d["class"]) or next d.elements[0].name == "p" && !d.elements[0].elements.empty? or next d.elements[0].elements[0].name == "br" && d.elements[0].elements[0]["style"] == "mso-special-character:line-break;page-break-before:always" or next d.elements[0].remove end end def word_footnote_format(docxml) # the content is in a[@epub:type = 'footnote']//sup, but in Word, # we need to inject content around the autonumbered footnote reference docxml.xpath("//a[@epub:type = 'footnote']").each do |x| footnote_reference_format(x) end docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\ "//span[@class = 'TableFootnoteRef']").each do |x| table_footnote_reference_format(x) end docxml end end end