require "uuidtools" require "nokogiri" module Html2Doc def self.process(result, filename, header_file, dir) docxml = Nokogiri::XML(xhtml(result)) cleanup(docxml, dir) define_head(docxml, dir, filename, header_file) result = self.msword_fix(docxml.to_xml) system "cp #{header_file} #{dir}/header.html" unless header_file.nil? generate_filelist(filename, dir) File.open("#{filename}.htm", "w") { |f| f.write(result) } mime_package result, filename, dir end def self.cleanup(docxml, dir) image_cleanup(docxml, dir) msonormal(docxml) end # preserve HTML escapes def self.xhtml(result) unless //, "") result = "" + result end result end def self.msword_fix(r) # brain damage in MSWord parser r.gsub!(%r{}, '') r.gsub!(%r{(") r.gsub!(%r{  ') r end def self.image_resize(orig_filename) image_size = ImageSize.path(orig_filename).size # max width for Word document is 400, max height is 680 if image_size[0] > 400 image_size[1] = (image_size[1] * 400 / image_size[0]).ceil image_size[0] = 400 end if image_size[1] > 680 image_size[0] = (image_size[0] * 680 / image_size[1]).ceil image_size[1] = 680 end image_size end def self.image_cleanup(docxml, dir) docxml.xpath("//*[local-name() = 'img']").each do |i| matched = /\.(?\S+)$/.match i["src"] uuid = UUIDTools::UUID.random_create.to_s new_full_filename = File.join(dir, "#{uuid}.#{matched[:suffix]}") # presupposes that the image source is local system "cp #{i['src']} #{new_full_filename}" i["width"], i["height"] = image_resize(i["src"]) i["src"] = new_full_filename end docxml end def self.define_head1(docxml, dir) docxml.xpath("//*[local-name() = 'head']").each do |h| h.children.first.add_previous_sibling <<~XML XML end end def self.stylesheet(filename, header_filename) fn = File.join(File.dirname(__FILE__), "wordstyle.css") stylesheet = File.read(fn, encoding: "UTF-8") if header_filename.nil? stylesheet.gsub!(/\n[^\n]*FILENAME[^\n]*i\n/, "\n") else stylesheet.gsub!(/FILENAME/, filename) end xml = Nokogiri::XML("