')} #{WORD_FTR1} OUTPUT end it "removes any temp files" do File.delete("test.doc") Html2Doc.process(html_input(""), filename: "test") expect(File.exist?("test.doc")).to be true expect(File.exist?("test.htm")).to be false expect(File.exist?("test_files")).to be false end it "processes a stylesheet in an HTML document with a title" do Html2Doc.process(html_input(""), filename: "test", stylesheet: "lib/html2doc/wordstyle.css") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))). to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body("", '

')} #{WORD_FTR1} OUTPUT end it "processes a stylesheet in an HTML document without a title" do Html2Doc.process(html_input_no_title(""), filename: "test", stylesheet: "lib/html2doc/wordstyle.css") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))). to match_fuzzy(<<~OUTPUT) #{WORD_HDR.sub("blank", "")} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body("", '

')} #{WORD_FTR1} OUTPUT end it "processes a stylesheet in an HTML document with an empty head" do Html2Doc.process(html_input_empty_head(""), filename: "test", stylesheet: "lib/html2doc/wordstyle.css") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))). to match_fuzzy(<<~OUTPUT) #{WORD_HDR.sub("blank", "")} #{DEFAULT_STYLESHEET} #{WORD_HDR_END.sub(''+"\n", "").sub("\n", "")} #{word_body("", '

')} #{WORD_FTR1} OUTPUT end it "processes a header" do Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header.html") html = guid_clean(File.read("test.doc", encoding: "utf-8")) hdr = Base64.decode64(html.sub(%r{^.*Content-Location: file:///C:/Doc/test_files/header.html}, ""). sub(%r{^.*Content-Type: text/html charset="utf-8"}m, ""). sub(%r{------=_NextPart_--.*$}m, "")).force_encoding("UTF-8") #expect(hdr.gsub(/\xa0/, " ")).to match_fuzzy(HEADERHTML) expect(HTMLEntities.new.encode(hdr, :hexadecimal). gsub(/\</, "<").gsub(/\>/, ">").gsub(/\'/, "'").gsub(/\"/, '"'). gsub(/\ /, " ").gsub(/\ /, "\n")).to match_fuzzy(HEADERHTML) expect(html.sub(%r{Content-Location: file:///C:/Doc/test_files/header.html.*$}m, "")). to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET.gsub(/FILENAME/, "test")} #{WORD_HDR_END} #{word_body("", '

')} #{WORD_FTR2} OUTPUT end it "processes a header with an image" do Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html") doc = guid_clean(File.read("test.doc", encoding: "utf-8")) expect(doc).to match(%r{Content-Type: image/png}) expect(doc).to match(%r{file:///C:/Doc/test_files/[^.]+\.png}) end it "processes a header with an image with absolute path" do doc = File.read("spec/header_img.html", encoding: "utf-8") File.open("spec/header_img1.html", "w:UTF-8") do |f| f.write doc.sub(%r{spec/19160-6.png}, File.expand_path(File.join(File.dirname(__FILE__), "19160-6.png"))) end Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img1.html") doc = guid_clean(File.read("test.doc", encoding: "utf-8")) expect(doc).to match(%r{Content-Type: image/png}) expect(doc).to match(%r{file:///C:/Doc/test_files/[^.]+\.png}) end it "processes a populated document" do simple_body = "

Hello word!

This is a very simple document

" Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))). to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(simple_body, '

')} #{WORD_FTR1} OUTPUT end it "processes AsciiMath" do Html2Doc.process(html_input(%[

]), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))). to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body("

#{ASCII_MATH}\"integer\"

", '

')} #{WORD_FTR1} OUTPUT end it "processes spaces in AsciiMath" do Html2Doc.process(html_input(%[

integer

", '

')} #{WORD_FTR1} OUTPUT end it "left-aligns AsciiMath" do Html2Doc.process(html_input("

"), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))). to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body("

#{ASCII_MATH}

", '

')} #{WORD_FTR1} OUTPUT end it "right-aligns AsciiMath" do Html2Doc.process(html_input("

#{ASCII_MATH}

", '

')} #{WORD_FTR1} OUTPUT end it "wraps msup after munderover in MathML" do Html2Doc.process(html_input("

\sum_{i = 0}^{n} 2^{i}

i=0n2i

', '

')} #{WORD_FTR1} OUTPUT end it "processes tabs" do simple_body = "

Hello word!

This is a very &tab; simple document

')} #{WORD_FTR1} OUTPUT end it "makes unstyled paragraphs be MsoNormal" do simple_body = '

Hello word!

This is a very simple document

This style stays

' Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))). to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(simple_body.gsub(/

/, %[

]), '

')} #{WORD_FTR1} OUTPUT end it "makes unstyled list entries be MsoNormal" do simple_body = '

Hello word!

This is a very simple document
This style stays

/, %[

]), '

')} #{WORD_FTR1} OUTPUT end it "resizes images for height, in a file in a subdirectory" do simple_body = '

' Html2Doc.process(html_input(simple_body), filename: "spec/test") testdoc = File.read("spec/test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/png}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('

', '

'))} #{image_clean(WORD_FTR3)} OUTPUT end it "resizes images for width" do simple_body = '

' Html2Doc.process(html_input(simple_body), filename: "test") testdoc = File.read("test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/gif}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('

', '

'))} #{image_clean(WORD_FTR3).gsub(/image\.png/, "image.gif")} OUTPUT end it "resizes images for height" do simple_body = '

' Html2Doc.process(html_input(simple_body), filename: "test") testdoc = File.read("test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/jpeg}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('

', '

'))} #{image_clean(WORD_FTR3).gsub(/image\.png/, "image.jpg")} OUTPUT end it "resizes images with missing or auto sizes" do image = { "src" => "spec/19160-8.jpg" } expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100] image["width"] = "20" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [20, 65] image.delete("width") image["height"] = "50" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [15, 50] image.delete("height") image["width"] = "500" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100] image.delete("width") image["height"] = "500" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100] image["width"] = "20" image["height"] = "auto" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [20, 65] image["width"] = "auto" image["height"] = "50" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [15, 50] image["width"] = "500" image["height"] = "auto" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100] image["width"] = "auto" image["height"] = "500" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100] image["width"] = "auto" image["height"] = "auto" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100] end it "does not move images if they are external URLs" do simple_body = '

' Html2Doc.process(html_input(simple_body), filename: "test") testdoc = File.read("test.doc", encoding: "utf-8") expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('

', '

'))} #{image_clean(WORD_FTR1)} OUTPUT end it "deals with absolute image locations" do simple_body = %{

} Html2Doc.process(html_input(simple_body), filename: "spec/test") testdoc = File.read("spec/test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/png}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('

', '

'))} #{image_clean(WORD_FTR3)} OUTPUT end =begin it "warns about SVG" do simple_body = '

' expect{ Html2Doc.process(html_input(simple_body), filename: "test") }.to output("https://example.com/19160-6.svg: SVG not supported\n").to_stderr end =end it "processes epub:type footnotes" do simple_body = '

This is a very simple document1 allegedly2

This is a very simple document allegedly

', '

Footnote

Other Footnote

')} #{WORD_FTR1} OUTPUT end it "processes class footnotes" do simple_body = '

This is a very simple document1 allegedly2

This is a very simple document allegedly

', '

Footnote

Other Footnote

')} #{WORD_FTR1} OUTPUT end it "processes footnotes with text wrapping the footnote reference" do simple_body = '

This is a very simple document(1) allegedly2

This is a very simple document() allegedly

', '

()Footnote

Other Footnote

')} #{WORD_FTR1} OUTPUT end it "extracts paragraphs from footnotes" do simple_body = '

This is a very simple document1 allegedly2

Other Footnote

This is a very simple document allegedly

', '

Footnote

Other Footnote

')} #{WORD_FTR1} OUTPUT end it "labels lists with list styles" do simple_body = <<~BODY

1. - 1. A
      B
      B2
      C

BODY Html2Doc.process(html_input(simple_body), filename: "test", liststyles: {ul: "l1", ol: "l2"}) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))). to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('

', '

')} #{WORD_FTR1} OUTPUT end it "restarts numbering of lists with list styles" do simple_body = <<~BODY

1. - 1. A

1. - 1. A

', '

')} #{WORD_FTR1} OUTPUT end it "labels lists with multiple list styles" do simple_body = <<~BODY

1. - 1. A
      B
      B2
      C

1. - 1. A
      B
      B2
      C

1. - 1. A
      B
      B2
      C

BODY Html2Doc.process(html_input(simple_body), filename: "test", liststyles: {ul: "l1", ol: "l2", steps: "l3"}) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))). to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('

', '

')} #{WORD_FTR1} OUTPUT end it "replaces id attributes with explicit a@name bookmarks" do simple_body = <<~BODY

Hello

', '

')} #{WORD_FTR1} OUTPUT end it "test image base64 image encoding" do simple_body = '

' Html2Doc.process(html_input(simple_body), filename: "spec/test", debug: true) testdoc = File.read("spec/test.doc", encoding: "utf-8") base64_image = testdoc[/image\/png\n\n(.*?)\n\n----/m, 1].gsub!("\n", "") base64_image_basename = testdoc[%r{Content-Location: file:///C:/Doc/test_files/([0-9a-z\-]+)\.png}m, 1] doc_bin_image = Base64.strict_decode64(base64_image) file_bin_image = IO.read("spec/test_files/#{base64_image_basename}.png", mode: "rb") expect(doc_bin_image).to eq file_bin_image FileUtils.rm_rf %w[spec/test_files spec/test.doc spec/test.htm] end end