')} #{WORD_FTR1} OUTPUT end it "processes a blank document" do Html2Doc.process(html_input(""), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('', '

')} #{WORD_FTR1} OUTPUT end it "removes any temp files" do File.delete("test.doc") Html2Doc.process(html_input(""), filename: "test") expect(File.exist?("test.doc")).to be true expect(File.exist?("test.htm")).to be false expect(File.exist?("test_files")).to be false end it "processes a stylesheet in an HTML document with a title" do Html2Doc.process(html_input(""), filename: "test", stylesheet: "lib/html2doc/wordstyle.css") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('', '

')} #{WORD_FTR1} OUTPUT end it "processes a stylesheet in an HTML document without a title" do Html2Doc.process(html_input_no_title(""), filename: "test", stylesheet: "lib/html2doc/wordstyle.css") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR.sub('blank', '')} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('', '

')} #{WORD_FTR1} OUTPUT end it "processes a stylesheet in an HTML document with an empty head" do Html2Doc.process(html_input_empty_head(""), filename: "test", stylesheet: "lib/html2doc/wordstyle.css") word_hdr_end = WORD_HDR_END .sub(%(\n), "") .sub("\n", "") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR.sub('blank', '')} #{DEFAULT_STYLESHEET} #{word_hdr_end} #{word_body('', '

')} #{WORD_FTR1} OUTPUT end it "processes a header" do Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header.html") html = guid_clean(File.read("test.doc", encoding: "utf-8")) hdr = Base64.decode64( html .sub(%r{^.*Content-Location: file:///C:/Doc/test_files/header.html}, "") .sub(%r{^.*Content-Type: text/html charset="utf-8"}m, "") .sub(%r{------=_NextPart_--.*$}m, ""), ).force_encoding("UTF-8") # expect(hdr.gsub(/\xa0/, " ")).to match_fuzzy(HEADERHTML) expect(HTMLEntities.new.encode(hdr, :hexadecimal) .gsub(/</, "<").gsub(/>/, ">") .gsub(/'/, "'").gsub(/"/, '"') .gsub(/ /, " ").gsub(/ /, "\n")) .to match_fuzzy(HEADERHTML) expect(html.sub(%r{Content-ID: .*$}m, "")) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET.gsub(/url$"[^"]+"$/, 'url(cid:header.html)')} #{WORD_HDR_END} #{word_body('', '

')} #{WORD_FTR2} OUTPUT end it "processes a header with an image" do Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html") doc = guid_clean(File.read("test.doc", encoding: "utf-8")) expect(doc).to match(%r{Content-Type: image/png}) expect(doc).to match(%r{iVBORw0KGgoAAAANSUhEUgAAA5cAAAN7CAYAAADRE24cAAAgAElEQVR4XuydB5gUxdaGC65gTogB}) end it "processes a header with an image with absolute path" do doc = File.read("spec/header_img.html", encoding: "utf-8") File.open("spec/header_img1.html", "w:UTF-8") do |f| f.write( doc.sub(%r{spec/19160-6.png}, File.expand_path(File.join(File.dirname(__FILE__), "19160-6.png"))), ) end Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img1.html") doc = guid_clean(File.read("test.doc", encoding: "utf-8")) expect(doc).to match(%r{Content-Type: image/png}) expect(doc).to match(%r{iVBORw0KGgoAAAANSUhEUgAAA5cAAAN7CAYAAADRE24cAAAgAElEQVR4XuydB5gUxdaGC65gTogB}) end it "processes a populated document" do simple_body = "

Hello word!

This is a very simple document

" Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(simple_body, '

')} #{WORD_FTR1} OUTPUT end it "processes AsciiMath" do Html2Doc.process(html_input(%[

]), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(%{

i=1ni3=nn+122"integer")

}, '

')} #{WORD_FTR1} OUTPUT end it "processes mstyle" do Html2Doc.process(html_input(%[

{{bb (-log_2 (p_u)) bb "BB" bbb "BBB" cc "CC" bcc "BCC" tt "TT" fr "FR" bfr "BFR" sf "SF" bsf "BSFα" sfi "SFI" sfbi "SFBIα" bii "BII" ii "II"}}

−log2puBB𝔹𝔹𝔹𝒞𝒞𝓑𝓒𝓒𝚃𝚃𝔉ℜ𝕭𝕱𝕽𝖲𝖥𝗕𝗦𝗙𝝰𝖲𝖥𝖨𝙎𝙁𝘽𝙄𝞪BIIII

}, '

')} #{WORD_FTR1} OUTPUT end it "processes spaces in AsciiMath" do Html2Doc.process(html_input(%[

text integer )

', '

')} #{WORD_FTR1} OUTPUT end it "processes spaces in MathML mtext" do Html2Doc.process(html_input("

H original J

"), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('

H original J

', '

')} #{WORD_FTR1} OUTPUT end it "unwraps accent in MathML" do Html2Doc.process(html_input("

\hat{p}

', '

')} #{WORD_FTR1} OUTPUT end it "left-aligns AsciiMath" do Html2Doc.process(html_input("

i=1ni3=nn+122

}, '

')} #{WORD_FTR1} OUTPUT end it "right-aligns AsciiMath" do Html2Doc.process(html_input("

i=1ni3=nn+122

}, '

')} #{WORD_FTR1} OUTPUT end it "raises error in processing of broken AsciiMath" do begin expect do Html2Doc.process(html_input(%[

]), filename: "test", asciimathdelims: ["{{", "}}"]) end.to output('parsing: u_c = 6.6"unitsml(kHz)').to_stderr rescue StandardError end expect do Html2Doc.process(html_input(%[

]), filename: "test", asciimathdelims: ["{{", "}}"]) end.to raise_error(StandardError) end it "wraps msup after munderover in MathML" do Html2Doc.process(html_input("

\sum_{i = 0}^{n} 2^{i}

i=0n2i

', '

')} #{WORD_FTR1} OUTPUT end it "processes tabs" do simple_body = "

Hello word!

This is a very &tab; simple document

')} #{WORD_FTR1} OUTPUT end it "makes unstyled paragraphs be MsoNormal" do simple_body = '

Hello word!

This is a very simple document

This style stays

' Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(simple_body.gsub(/

/, %[

]), '

')} #{WORD_FTR1} OUTPUT end it "makes unstyled list entries be MsoNormal" do simple_body = '

Hello word!

This is a very simple document
This style stays

/, %[

]), '

')} #{WORD_FTR1} OUTPUT end it "resizes images for height, in a file in a subdirectory" do simple_body = '

' Html2Doc.process(html_input(simple_body), filename: "spec/test", imagedir: "spec") testdoc = File.read("spec/test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/png}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('

', '

'))} #{image_clean(WORD_FTR3)} OUTPUT end it "resizes images for width" do simple_body = '

' Html2Doc.process(html_input(simple_body), filename: "test", imagedir: ".") testdoc = File.read("test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/gif}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('

', '

'))} #{image_clean(WORD_FTR3).gsub(/image\.png/, 'image.gif')} OUTPUT end it "resizes images for height" do simple_body = '

' Html2Doc.process(html_input(simple_body), filename: "test", imagedir: ".") testdoc = File.read("test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/jpeg}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('

', '

'))} #{image_clean(WORD_FTR3).gsub(/image\.png/, 'image.jpg')} OUTPUT end it "resizes images with missing or auto sizes" do image = { "src" => "spec/19160-8.jpg" } expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] image["width"] = "20" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [20, 65] image.delete("width") image["height"] = "50" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [15, 50] image.delete("height") image["width"] = "500" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] image.delete("width") image["height"] = "500" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] image["width"] = "20" image["height"] = "auto" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [20, 65] image["width"] = "auto" image["height"] = "50" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [15, 50] image["width"] = "500" image["height"] = "auto" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] image["width"] = "auto" image["height"] = "500" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] image["width"] = "auto" image["height"] = "auto" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] end it "does not move images if they are external URLs" do simple_body = '

' Html2Doc.process(html_input(simple_body), filename: "test", imagedir: ".") testdoc = File.read("test.doc", encoding: "utf-8") expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('

', '

'))} #{image_clean(WORD_FTR1)} OUTPUT end it "deals with absolute image locations" do simple_body = %{

} Html2Doc.process(html_input(simple_body), filename: "spec/test", imagedir: ".") testdoc = File.read("spec/test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/png}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('

', '

'))} #{image_clean(WORD_FTR3)} OUTPUT end # it "warns about SVG" do # simple_body = '

' # expect{ Html2Doc.process(html_input(simple_body), filename: "test") } # .to output("https://example.com/19160-6.svg: SVG not supported\n").to_stderr # end it "processes epub:type footnotes" do simple_body = '

This is a very simple document1 allegedly2

This is a very simple document allegedly

', '

Footnote

Other Footnote

')} #{WORD_FTR1} OUTPUT end it "processes class footnotes" do simple_body = '

This is a very simple document1 allegedly2

This is a very simple document allegedly

', '

Footnote

Other Footnote

')} #{WORD_FTR1} OUTPUT end it "processes footnotes with text wrapping the footnote reference" do simple_body = '

This is a very simple document(1) allegedly2

This is a very simple document() allegedly

', '

()Footnote

Other Footnote

')} #{WORD_FTR1} OUTPUT end it "extracts paragraphs from footnotes" do simple_body = '

This is a very simple document1 allegedly2

Other Footnote

This is a very simple document allegedly

', '

Footnote

Other Footnote

')} #{WORD_FTR1} OUTPUT end it "labels lists with list styles" do simple_body = <<~BODY

1. - 1. A
      B
      B2
      C

')} #{WORD_FTR1} OUTPUT end it "restarts numbering of lists with list styles" do simple_body = <<~BODY

1. - 1. A

1. - 1. A

BODY Html2Doc.process(html_input(simple_body), filename: "test", liststyles: { ul: "l1", ol: "l2" }) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('

', '

')} #{WORD_FTR1} OUTPUT end it "labels lists with multiple list styles" do simple_body = <<~BODY

1. - 1. A
      B
      B2
      C

1. - 1. A
      B
      B2
      C

1. - 1. A
      B
      B2
      C

BODY Html2Doc.process(html_input(simple_body), filename: "test", liststyles: { ul: "l1", ol: "l2", steps: "l3" }) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('

', '

')} #{WORD_FTR1} OUTPUT end it "replaces id attributes with explicit a@name bookmarks" do simple_body = <<~BODY

Hello

', '

')} #{WORD_FTR1} OUTPUT end it "test image base64 image encoding" do simple_body = '

' Html2Doc.process(html_input(simple_body), filename: "spec/test", debug: true, imagedir: "spec") testdoc = File.read("spec/test.doc", encoding: "utf-8") base64_image = testdoc[/image\/png\n\n(.*?)\n\n----/m, 1].gsub!("\n", "") base64_image_basename = testdoc[%r{Content-ID: <([0-9a-z\-]+)\.png}m, 1] doc_bin_image = Base64.strict_decode64(base64_image) file_bin_image = IO .read("spec/test_files/#{base64_image_basename}.png", mode: "rb") expect(doc_bin_image).to eq file_bin_image FileUtils.rm_rf %w[spec/test_files spec/test.doc spec/test.htm] end end