lib/html2doc/mime.rb in html2doc-0.0.1 vs lib/html2doc/mime.rb in html2doc-0.5.0

- old
+ new

@@ -1,127 +1,9 @@ require "uuidtools" require "nokogiri" module Html2Doc - def self.process(result, filename, header_file, dir) - docxml = Nokogiri::XML(xhtml(result)) - cleanup(docxml, dir) - define_head(docxml, dir, filename, header_file) - result = self.msword_fix(docxml.to_xml) - system "cp #{header_file} #{dir}/header.html" unless header_file.nil? - generate_filelist(filename, dir) - File.open("#{filename}.htm", "w") { |f| f.write(result) } - mime_package result, filename, dir - end - - def self.cleanup(docxml, dir) - image_cleanup(docxml, dir) - msonormal(docxml) - end - - # preserve HTML escapes - def self.xhtml(result) - unless /<!DOCTYPE html/.match? result - result.gsub!(/<\?xml version="1.0"\?>/, "") - result = "<!DOCTYPE html SYSTEM " + - "'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>" + result - end - result - end - - def self.msword_fix(r) - # brain damage in MSWord parser - r.gsub!(%r{<span style="mso-special-character:footnote"/>}, - '<span style="mso-special-character:footnote"></span>') - r.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>") - r.gsub!(%r{<link rel="File-List"}, "<link rel=File-List") - r.gsub!(%r{<meta http-equiv="Content-Type"}, - "<meta http-equiv=Content-Type") - r.gsub!(%r{&tab;|&amp;tab;}, - '<span style="mso-tab-count:1">&#xA0; </span>') - r - end - - def self.image_resize(orig_filename) - image_size = ImageSize.path(orig_filename).size - # max width for Word document is 400, max height is 680 - if image_size[0] > 400 - image_size[1] = (image_size[1] * 400 / image_size[0]).ceil - image_size[0] = 400 - end - if image_size[1] > 680 - image_size[0] = (image_size[0] * 680 / image_size[1]).ceil - image_size[1] = 680 - end - image_size - end - - def self.image_cleanup(docxml, dir) - docxml.xpath("//*[local-name() = 'img']").each do |i| - matched = /\.(?<suffix>\S+)$/.match i["src"] - uuid = UUIDTools::UUID.random_create.to_s - new_full_filename = File.join(dir, "#{uuid}.#{matched[:suffix]}") - # presupposes that the image source is local - system "cp #{i['src']} #{new_full_filename}" - i["width"], i["height"] = image_resize(i["src"]) - i["src"] = new_full_filename - end - docxml - end - - def self.define_head1(docxml, dir) - docxml.xpath("//*[local-name() = 'head']").each do |h| - h.children.first.add_previous_sibling <<~XML - <!--[if gte mso 9]> - <xml> - <w:WordDocument> - <w:View>Print</w:View> - <w:Zoom>100</w:Zoom> - <w:DoNotOptimizeForBrowser/> - </w:WordDocument> - </xml> - <![endif]--> - <meta http-equiv=Content-Type content="text/html; charset=utf-8"/> - <link rel="File-List" href="#{dir}/filelist.xml"/> - XML - end - end - - def self.stylesheet(filename, header_filename) - fn = File.join(File.dirname(__FILE__), "wordstyle.css") - stylesheet = File.read(fn, encoding: "UTF-8") - if header_filename.nil? - stylesheet.gsub!(/\n[^\n]*FILENAME[^\n]*i\n/, "\n") - else - stylesheet.gsub!(/FILENAME/, filename) - end - xml = Nokogiri::XML("<style/>") - xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n") - xml.root.to_s - end - - def self.define_head(docxml, dir, filename, header_file) - title = docxml.at("//*[local-name() = 'head']/*[local-name() = 'title']") - head = docxml.at("//*[local-name() = 'head']") - if title.nil? - head.children.first.add_previous_sibling stylesheet(filename, header_file) - else - title.add_next_sibling stylesheet(filename, header_file) - end - define_head1(docxml, dir) - namespace(docxml.root) - end - - def self.namespace(root) - { - o: "urn:schemas-microsoft-com:office:office", - w: "urn:schemas-microsoft-com:office:word", - m: "http://schemas.microsoft.com/office/2004/12/omml", - }.each { |k, v| root.add_namespace_definition(k.to_s, v) } - root.add_namespace(nil, "http://www.w3.org/TR/REC-html40") - end - def self.mime_preamble(boundary, filename, result) <<~"PREAMBLE" MIME-Version: 1.0 Content-Type: multipart/related; boundary="#{boundary}" @@ -168,30 +50,7 @@ next if item == "." || item == ".." || /^\./.match(item) mhtml += mime_attachment(boundary, filename, item, dir) end mhtml += "--#{boundary}--" File.open("#{filename}.doc", "w") { |f| f.write mhtml } - end - - def self.generate_filelist(filename, dir) - File.open(File.join(dir, "filelist.xml"), "w") do |f| - f.write(<<~"XML") - <xml xmlns:o="urn:schemas-microsoft-com:office:office"> - <o:MainFile HRef="../#{filename}.htm"/> - XML - Dir.foreach(dir) do |item| - next if item == "." || item == ".." || /^\./.match(item) - f.write %{ <o:File HRef="#{item}"/>\n} - end - f.write("</xml>\n") - end - end - - def self.msonormal(docxml) - docxml.xpath("//*[local-name() = 'p'][not(self::*[@class])]").each do |p| - p["class"] = "MsoNormal" - end - docxml.xpath("//*[local-name() = 'li'][not(self::*[@class])]").each do |p| - p["class"] = "MsoNormal" - end end end