html2doc_spec.rb in html2doc-1.1.0

- old
+ new

@@ -34,11 +34,12 @@
 WORD_HDR = <<~HDR
 MIME-Version: 1.0
 Content-Type: multipart/related; boundary="----=_NextPart_"
 
 ------=_NextPart_
-Content-Location: file:///C:/Doc/test.htm
+Content-ID: <test.htm>
+Content-Disposition: inline; filename="test.htm"
 Content-Type: text/html; charset="utf-8"
 
 <?xml version="1.0"?>
 <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><!--[if gte mso 9]>
 <xml>
@@ -49,11 +50,11 @@
 </w:WordDocument>
 </xml>
 <![endif]-->
 <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
 
-  <link rel=File-List href="test_files/filelist.xml"/>
+  <link rel=File-List href="cid:filelist.xml"/>
 <title>blank</title><style><![CDATA[
   <!--
 HDR
 
 WORD_HDR_END = <<~HDR
@@ -71,11 +72,12 @@
   BODY
 end
 
 WORD_FTR1 = <<~FTR
   ------=_NextPart_
-Content-Location: file:///C:/Doc/test_files/filelist.xml
+Content-ID: <filelist.xml>
+Content-Disposition: inline; filename="filelist.xml"
 Content-Transfer-Encoding: base64
 Content-Type: #{Html2Doc::mime_type('filelist.xml')}
 
 PHhtbCB4bWxuczpvPSJ1cm46c2NoZW1hcy1taWNyb3NvZnQtY29tOm9mZmljZTpvZmZpY2UiPgog
 ICAgICAgIDxvOk1haW5GaWxlIEhSZWY9Ii4uL3Rlc3QuaHRtIi8+ICA8bzpGaWxlIEhSZWY9ImZp
@@ -84,31 +86,34 @@
 ------=_NextPart_--
 FTR
 
 WORD_FTR2 = <<~FTR
   ------=_NextPart_
-Content-Location: file:///C:/Doc/test_files/filelist.xml
+Content-ID: <filelist.xml>
+Content-Disposition: inline; filename="filelist.xml"
 Content-Transfer-Encoding: base64
 Content-Type: #{Html2Doc::mime_type('filelist.xml')}
 PHhtbCB4bWxuczpvPSJ1cm46c2NoZW1hcy1taWNyb3NvZnQtY29tOm9mZmljZTpvZmZpY2UiPgog
 ICAgICAgIDxvOk1haW5GaWxlIEhSZWY9Ii4uL3Rlc3QuaHRtIi8+ICA8bzpGaWxlIEhSZWY9ImZp
 bGVsaXN0LnhtbCIvPgogIDxvOkZpbGUgSFJlZj0iaGVhZGVyLmh0bWwiLz4KPC94bWw+Cg==
 ------=_NextPart_
 FTR
 
 WORD_FTR3 = <<~FTR
 ------=_NextPart_
-Content-Location: file:///C:/Doc/test_files/filelist.xml
+Content-ID: <filelist.xml>
+Content-Disposition: inline; filename="filelist.xml"
 Content-Transfer-Encoding: base64
 Content-Type: #{Html2Doc::mime_type('filelist.xml')}
 
 PHhtbCB4bWxuczpvPSJ1cm46c2NoZW1hcy1taWNyb3NvZnQtY29tOm9mZmljZTpvZmZpY2UiPgog
 ICAgICAgIDxvOk1haW5GaWxlIEhSZWY9Ii4uL3Rlc3QuaHRtIi8+ICA8bzpGaWxlIEhSZWY9IjFh
 YzIwNjVmLTAzZjAtNGM3YS1iOWE2LTkyZTgyMDU5MWJmMC5wbmciLz4KICA8bzpGaWxlIEhSZWY9
 ImZpbGVsaXN0LnhtbCIvPgo8L3htbD4K
 ------=_NextPart_
-Content-Location: file:///C:/Doc/test_files/cb7b0d19-891e-4634-815a-570d019d454c.png
+Content-ID: <cb7b0d19-891e-4634-815a-570d019d454c.png>
+Content-Disposition: inline; filename="cb7b0d19-891e-4634-815a-570d019d454c.png"
 Content-Transfer-Encoding: base64
 Content-Type: image/png
 ------=_NextPart_--
 FTR
 
@@ -327,33 +332,33 @@
                            sub(%r{------=_NextPart_--.*$}m, "")).force_encoding("UTF-8")
     #expect(hdr.gsub(/\xa0/, " ")).to match_fuzzy(HEADERHTML)
     expect(HTMLEntities.new.encode(hdr, :hexadecimal).
            gsub(/\&#x3c;/, "<").gsub(/\&#x3e;/, ">").gsub(/\&#x27;/, "'").gsub(/\&#x22;/, '"').
            gsub(/\&#xd;/, "&#xa;").gsub(/\&#xa;/, "\n")).to match_fuzzy(HEADERHTML)
-    expect(html.sub(%r{Content-Location: file:///C:/Doc/test_files/header.html.*$}m, "")).
+    expect(html.sub(%r{Content-ID: <header.html>.*$}m, "")).
            to match_fuzzy(<<~OUTPUT)
-    #{WORD_HDR} #{DEFAULT_STYLESHEET.gsub(/FILENAME/, "test")} 
+    #{WORD_HDR} #{DEFAULT_STYLESHEET.gsub(/url\("[^"]+"\)/, "url(cid:header.html)")} 
     #{WORD_HDR_END} #{word_body("", '<div style="mso-element:footnote-list"/>')} #{WORD_FTR2}
     OUTPUT
   end
 
   it "processes a header with an image" do
     Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html")
     doc = guid_clean(File.read("test.doc", encoding: "utf-8"))
     expect(doc).to match(%r{Content-Type: image/png})
-    expect(doc).to match(%r{file:///C:/Doc/test_files/[^.]+\.png})
+    expect(doc).to match(%r{iVBORw0KGgoAAAANSUhEUgAAA5cAAAN7CAYAAADRE24cAAAgAElEQVR4XuydB5gUxdaGC65gTogB})
   end
 
   it "processes a header with an image with absolute path" do
     doc = File.read("spec/header_img.html", encoding: "utf-8")
     File.open("spec/header_img1.html", "w:UTF-8") do |f|
       f.write doc.sub(%r{spec/19160-6.png}, File.expand_path(File.join(File.dirname(__FILE__), "19160-6.png")))
     end
     Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img1.html")
     doc = guid_clean(File.read("test.doc", encoding: "utf-8"))
     expect(doc).to match(%r{Content-Type: image/png})
-    expect(doc).to match(%r{file:///C:/Doc/test_files/[^.]+\.png})
+    expect(doc).to match(%r{iVBORw0KGgoAAAANSUhEUgAAA5cAAAN7CAYAAADRE24cAAAgAElEQVR4XuydB5gUxdaGC65gTogB})
   end
 
 
   it "processes a populated document" do
     simple_body = "<h1>Hello word!</h1>
@@ -533,11 +538,11 @@
     Html2Doc.process(html_input(simple_body), filename: "spec/test")
     testdoc = File.read("spec/test.doc", encoding: "utf-8")
     expect(testdoc).to match(%r{Content-Type: image/png})
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
     #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
-    #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.png" width="400" height="388"></img>', '<div style="mso-element:footnote-list"/>'))}
+    #{image_clean(word_body('<img src="cid:cb7b0d19-891e-4634-815a-570d019d454c.png" width="400" height="388"></img>', '<div style="mso-element:footnote-list"/>'))}
     #{image_clean(WORD_FTR3)}
     OUTPUT
   end
 
   it "resizes images for width" do
@@ -545,11 +550,11 @@
     Html2Doc.process(html_input(simple_body), filename: "test")
     testdoc = File.read("test.doc", encoding: "utf-8")
     expect(testdoc).to match(%r{Content-Type: image/gif})
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
     #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
-    #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.gif" width="400" height="118"></img>', '<div style="mso-element:footnote-list"/>'))}
+    #{image_clean(word_body('<img src="cid:cb7b0d19-891e-4634-815a-570d019d454c.gif" width="400" height="118"></img>', '<div style="mso-element:footnote-list"/>'))}
     #{image_clean(WORD_FTR3).gsub(/image\.png/, "image.gif")}
     OUTPUT
   end
 
   it "resizes images for height" do
@@ -557,11 +562,11 @@
     Html2Doc.process(html_input(simple_body), filename: "test")
     testdoc = File.read("test.doc", encoding: "utf-8")
     expect(testdoc).to match(%r{Content-Type: image/jpeg})
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
     #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
-    #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="208" height="680"></img>', '<div style="mso-element:footnote-list"/>'))}
+    #{image_clean(word_body('<img src="cid:cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="208" height="680"></img>', '<div style="mso-element:footnote-list"/>'))}
     #{image_clean(WORD_FTR3).gsub(/image\.png/, "image.jpg")}
     OUTPUT
   end
 
   it "resizes images with missing or auto sizes" do
@@ -611,11 +616,11 @@
     Html2Doc.process(html_input(simple_body), filename: "spec/test")
     testdoc = File.read("spec/test.doc", encoding: "utf-8")
     expect(testdoc).to match(%r{Content-Type: image/png})
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
     #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
-    #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.png" width="400" height="388"></img>', '<div style="mso-element:footnote-list"/>'))}
+    #{image_clean(word_body('<img src="cid:cb7b0d19-891e-4634-815a-570d019d454c.png" width="400" height="388"></img>', '<div style="mso-element:footnote-list"/>'))}
     #{image_clean(WORD_FTR3)}
     OUTPUT
   end
 
 =begin
@@ -788,10 +793,10 @@
   it "test image base64 image encoding" do
     simple_body = '<img src="19160-6.png">'
     Html2Doc.process(html_input(simple_body), filename: "spec/test", debug: true)
     testdoc = File.read("spec/test.doc", encoding: "utf-8")
     base64_image = testdoc[/image\/png\n\n(.*?)\n\n----/m, 1].gsub!("\n", "")
-    base64_image_basename = testdoc[%r{Content-Location: file:///C:/Doc/test_files/([0-9a-z\-]+)\.png}m, 1]
+    base64_image_basename = testdoc[%r{Content-ID: <([0-9a-z\-]+)\.png}m, 1]
     doc_bin_image = Base64.strict_decode64(base64_image)
     file_bin_image = IO.read("spec/test_files/#{base64_image_basename}.png", mode: "rb")
     expect(doc_bin_image).to eq file_bin_image
     FileUtils.rm_rf %w[spec/test_files spec/test.doc spec/test.htm]
   end