require "base64" def html_input(xml) <<~HTML blank #{xml} HTML end def html_input_no_title(xml) <<~HTML #{xml} HTML end def html_input_empty_head(xml) <<~HTML #{xml} HTML end WORD_HDR = <<~HDR.freeze MIME-Version: 1.0 Content-Type: multipart/related; boundary="----=_NextPart_" ------=_NextPart_ Content-ID: Content-Disposition: inline; filename="test.htm" Content-Type: text/html; charset="utf-8" Print 100 blank HDR def word_body(xml, footnote) <<~BODY #{xml} #{footnote} BODY end WORD_FTR1 = <<~FTR.freeze ------=_NextPart_ Content-ID: Content-Disposition: inline; filename="filelist.xml" Content-Transfer-Encoding: base64 Content-Type: #{Html2Doc::mime_type('filelist.xml')} PHhtbCB4bWxuczpvPSJ1cm46c2NoZW1hcy1taWNyb3NvZnQtY29tOm9mZmljZTpvZmZpY2UiPgog ICAgICAgIDxvOk1haW5GaWxlIEhSZWY9Ii4uL3Rlc3QuaHRtIi8+ICA8bzpGaWxlIEhSZWY9ImZp bGVsaXN0LnhtbCIvPgo8L3htbD4K ------=_NextPart_-- FTR WORD_FTR2 = <<~FTR.freeze ------=_NextPart_ Content-ID: Content-Disposition: inline; filename="filelist.xml" Content-Transfer-Encoding: base64 Content-Type: #{Html2Doc::mime_type('filelist.xml')} PHhtbCB4bWxuczpvPSJ1cm46c2NoZW1hcy1taWNyb3NvZnQtY29tOm9mZmljZTpvZmZpY2UiPgog ICAgICAgIDxvOk1haW5GaWxlIEhSZWY9Ii4uL3Rlc3QuaHRtIi8+ICA8bzpGaWxlIEhSZWY9ImZp bGVsaXN0LnhtbCIvPgogIDxvOkZpbGUgSFJlZj0iaGVhZGVyLmh0bWwiLz4KPC94bWw+Cg== ------=_NextPart_ FTR WORD_FTR3 = <<~FTR.freeze ------=_NextPart_ Content-ID: Content-Disposition: inline; filename="filelist.xml" Content-Transfer-Encoding: base64 Content-Type: #{Html2Doc::mime_type('filelist.xml')} PHhtbCB4bWxuczpvPSJ1cm46c2NoZW1hcy1taWNyb3NvZnQtY29tOm9mZmljZTpvZmZpY2UiPgog ICAgICAgIDxvOk1haW5GaWxlIEhSZWY9Ii4uL3Rlc3QuaHRtIi8+ICA8bzpGaWxlIEhSZWY9IjFh YzIwNjVmLTAzZjAtNGM3YS1iOWE2LTkyZTgyMDU5MWJmMC5wbmciLz4KICA8bzpGaWxlIEhSZWY9 ImZpbGVsaXN0LnhtbCIvPgo8L3htbD4K ------=_NextPart_ Content-ID: Content-Disposition: inline; filename="cb7b0d19-891e-4634-815a-570d019d454c.png" Content-Transfer-Encoding: base64 Content-Type: image/png ------=_NextPart_-- FTR HEADERHTML = <<~FTR.freeze





ISO/IEC&nbsp;CD 17301-1:2016(E)

© ISO/IEC&nbsp;2016&nbsp;– All rights reserved

2                                                                                                                                                                           © ISO/IEC&nbsp;2016&nbsp;– All rights reserved

ISO/IEC&nbsp;CD 17301-1:2016(E)

ISO/IEC&nbsp;CD 17301-1:2016(E)

ii                                                                                                                                                                           © ISO/IEC&nbsp;2016&nbsp;– All rights reserved

© ISO/IEC&nbsp;2016&nbsp;– All rights reserved                                                                                                                                                                          iii

2                                                                                                                                                                           © ISO/IEC&nbsp;2016&nbsp;– All rights reserved

© ISO/IEC&nbsp;2016&nbsp;– All rights reserved                                                                                                                                                                           3

FTR ASCII_MATH = 'i=1ni3=nn+122'.freeze DEFAULT_STYLESHEET = File.read("lib/html2doc/wordstyle.css", encoding: "utf-8").freeze def guid_clean(xml) xml.gsub(/NextPart_[0-9a-f.]+/, "NextPart_") end def image_clean(xml) xml.gsub(%r{[0-9a-f-]+\.png}, "image.png") .gsub(%r{[0-9a-f-]+\.gif}, "image.gif") .gsub(%r{[0-9a-f-]+\.(jpeg|jpg)}, "image.jpg") .gsub(%r{------=_NextPart_\s+Content-Location: file:///C:/Doc/test_files/image\.(png|gif).*?\s-----=_NextPart_}m, "------=_NextPart_") .gsub(%r{Content-Type: image/(png|gif|jpeg)[^-]*------=_NextPart_-?-?}m, "") .gsub(%r{ICAgICAg[^-]*-----}m, "-----") .gsub(%r{\s*\s*}m, "") .gsub(%r{\s*}m, "") end RSpec.describe Html2Doc do it "has a version number" do expect(Html2Doc::VERSION).not_to be nil end it "preserves Word HTML directives" do Html2Doc.process(html_input(%[AB]), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(%{AB}, '
')} #{WORD_FTR1} OUTPUT end it "processes a blank document" do Html2Doc.process(html_input(""), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('', '
')} #{WORD_FTR1} OUTPUT end it "removes any temp files" do File.delete("test.doc") Html2Doc.process(html_input(""), filename: "test") expect(File.exist?("test.doc")).to be true expect(File.exist?("test.htm")).to be false expect(File.exist?("test_files")).to be false end it "processes a stylesheet in an HTML document with a title" do Html2Doc.process(html_input(""), filename: "test", stylesheet: "lib/html2doc/wordstyle.css") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('', '
')} #{WORD_FTR1} OUTPUT end it "processes a stylesheet in an HTML document without a title" do Html2Doc.process(html_input_no_title(""), filename: "test", stylesheet: "lib/html2doc/wordstyle.css") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR.sub('blank', '')} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('', '
')} #{WORD_FTR1} OUTPUT end it "processes a stylesheet in an HTML document with an empty head" do Html2Doc.process(html_input_empty_head(""), filename: "test", stylesheet: "lib/html2doc/wordstyle.css") word_hdr_end = WORD_HDR_END .sub(%(\n), "") .sub("\n", "") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR.sub('blank', '')} #{DEFAULT_STYLESHEET} #{word_hdr_end} #{word_body('', '
')} #{WORD_FTR1} OUTPUT end it "processes a header" do Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header.html") html = guid_clean(File.read("test.doc", encoding: "utf-8")) hdr = Base64.decode64( html .sub(%r{^.*Content-Location: file:///C:/Doc/test_files/header.html}, "") .sub(%r{^.*Content-Type: text/html charset="utf-8"}m, "") .sub(%r{------=_NextPart_--.*$}m, ""), ).force_encoding("UTF-8") # expect(hdr.gsub(/\xa0/, " ")).to match_fuzzy(HEADERHTML) expect(HTMLEntities.new.encode(hdr, :hexadecimal) .gsub(/</, "<").gsub(/>/, ">") .gsub(/'/, "'").gsub(/"/, '"') .gsub(/ /, " ").gsub(/ /, "\n")) .to match_fuzzy(HEADERHTML) expect(html.sub(%r{Content-ID: .*$}m, "")) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET.gsub(/url\("[^"]+"\)/, 'url(cid:header.html)')} #{WORD_HDR_END} #{word_body('', '
')} #{WORD_FTR2} OUTPUT end it "processes a header with an image" do Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html") doc = guid_clean(File.read("test.doc", encoding: "utf-8")) expect(doc).to match(%r{Content-Type: image/png}) expect(doc).to match(%r{iVBORw0KGgoAAAANSUhEUgAAA5cAAAN7CAYAAADRE24cAAAgAElEQVR4XuydB5gUxdaGC65gTogB}) end it "processes a header with an image with absolute path" do doc = File.read("spec/header_img.html", encoding: "utf-8") File.open("spec/header_img1.html", "w:UTF-8") do |f| f.write( doc.sub(%r{spec/19160-6.png}, File.expand_path(File.join(File.dirname(__FILE__), "19160-6.png"))), ) end Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img1.html") doc = guid_clean(File.read("test.doc", encoding: "utf-8")) expect(doc).to match(%r{Content-Type: image/png}) expect(doc).to match(%r{iVBORw0KGgoAAAANSUhEUgAAA5cAAAN7CAYAAADRE24cAAAgAElEQVR4XuydB5gUxdaGC65gTogB}) end it "processes a populated document" do simple_body = "

Hello word!

This is a very simple document
" Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(simple_body, '
')} #{WORD_FTR1} OUTPUT end it "processes AsciiMath" do Html2Doc.process(html_input(%[
{{sum_(i=1)^n i^3=((n(n+1))/2)^2 text("integer"))}}
]), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(%{
i=1ni3=nn+122"integer")
}, '
')} #{WORD_FTR1} OUTPUT end it "processes mstyle" do Html2Doc.process(html_input(%[
{{bb (-log_2 (p_u)) bb "BB" bbb "BBB" cc "CC" bcc "BCC" tt "TT" fr "FR" bfr "BFR" sf "SF" bsf "BSFα" sfi "SFI" sfbi "SFBIα" bii "BII" ii "II"}}
]), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(%{
log2puBB𝔹𝔹𝔹𝒞𝒞𝓑𝓒𝓒𝚃𝚃𝔉ℜ𝕭𝕱𝕽𝖲𝖥𝗕𝗦𝗙𝝰𝖲𝖥𝖨𝙎𝙁𝘽𝙄𝞪BIIII
}, '
')} #{WORD_FTR1} OUTPUT end it "processes spaces in AsciiMath" do Html2Doc.process(html_input(%[
{{text " integer ")}}
]), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('
text integer )
', '
')} #{WORD_FTR1} OUTPUT end it "processes spaces in MathML mtext" do Html2Doc.process(html_input("
H original J
"), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('
H original J
', '
')} #{WORD_FTR1} OUTPUT end it "unwraps accent in MathML" do Html2Doc.process(html_input("
p^
"), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('
p
', '
')} #{WORD_FTR1} OUTPUT end it "left-aligns AsciiMath" do Html2Doc.process(html_input("
{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}
"), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(%{
i=1ni3=nn+122
}, '
')} #{WORD_FTR1} OUTPUT end it "right-aligns AsciiMath" do Html2Doc.process(html_input("
{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}
"), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(%{
i=1ni3=nn+122
}, '
')} #{WORD_FTR1} OUTPUT end it "raises error in processing of broken AsciiMath" do begin expect do Html2Doc.process(html_input(%[
{{u_c = 6.6"unitsml(kHz)}}
]), filename: "test", asciimathdelims: ["{{", "}}"]) end.to output('parsing: u_c = 6.6"unitsml(kHz)').to_stderr rescue StandardError end expect do Html2Doc.process(html_input(%[
{{u_c = 6.6"unitsml(kHz)}}
]), filename: "test", asciimathdelims: ["{{", "}}"]) end.to raise_error(StandardError) end it "wraps msup after munderover in MathML" do Html2Doc.process(html_input("
i=0n2i
"), filename: "test", asciimathdelims: ["{{", "}}"]) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('
i=0n2i
', '
')} #{WORD_FTR1} OUTPUT end it "processes tabs" do simple_body = "

Hello word!

This is a very &tab; simple document
" Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(simple_body.gsub(/&tab;/, %[  ]), '
')} #{WORD_FTR1} OUTPUT end it "makes unstyled paragraphs be MsoNormal" do simple_body = '

Hello word!

This is a very simple document

This style stays

' Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(simple_body.gsub(/

/, %[

]), '

')} #{WORD_FTR1} OUTPUT end it "makes unstyled list entries be MsoNormal" do simple_body = '

Hello word!

  • This is a very simple document
  • This style stays
' Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body(simple_body.gsub(/
  • /, %[
  • ]), '
    ')} #{WORD_FTR1} OUTPUT end it "resizes images for height, in a file in a subdirectory" do simple_body = '' Html2Doc.process(html_input(simple_body), filename: "spec/test", imagedir: "spec") testdoc = File.read("spec/test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/png}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('', '
    '))} #{image_clean(WORD_FTR3)} OUTPUT end it "resizes images for width" do simple_body = '' Html2Doc.process(html_input(simple_body), filename: "test", imagedir: ".") testdoc = File.read("test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/gif}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('', '
    '))} #{image_clean(WORD_FTR3).gsub(/image\.png/, 'image.gif')} OUTPUT end it "resizes images for height" do simple_body = '' Html2Doc.process(html_input(simple_body), filename: "test", imagedir: ".") testdoc = File.read("test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/jpeg}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('', '
    '))} #{image_clean(WORD_FTR3).gsub(/image\.png/, 'image.jpg')} OUTPUT end it "resizes images with missing or auto sizes" do image = { "src" => "spec/19160-8.jpg" } expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] image["width"] = "20" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [20, 65] image.delete("width") image["height"] = "50" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [15, 50] image.delete("height") image["width"] = "500" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] image.delete("width") image["height"] = "500" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] image["width"] = "20" image["height"] = "auto" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [20, 65] image["width"] = "auto" image["height"] = "50" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [15, 50] image["width"] = "500" image["height"] = "auto" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] image["width"] = "auto" image["height"] = "500" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] image["width"] = "auto" image["height"] = "auto" expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)) .to eq [30, 100] end it "does not move images if they are external URLs" do simple_body = '' Html2Doc.process(html_input(simple_body), filename: "test", imagedir: ".") testdoc = File.read("test.doc", encoding: "utf-8") expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('', '
    '))} #{image_clean(WORD_FTR1)} OUTPUT end it "deals with absolute image locations" do simple_body = %{} Html2Doc.process(html_input(simple_body), filename: "spec/test", imagedir: ".") testdoc = File.read("spec/test.doc", encoding: "utf-8") expect(testdoc).to match(%r{Content-Type: image/png}) expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{image_clean(word_body('', '
    '))} #{image_clean(WORD_FTR3)} OUTPUT end # it "warns about SVG" do # simple_body = '' # expect{ Html2Doc.process(html_input(simple_body), filename: "test") } # .to output("https://example.com/19160-6.svg: SVG not supported\n").to_stderr # end it "processes epub:type footnotes" do simple_body = '
    This is a very simple document1 allegedly2
    ' Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('
    This is a very simple document allegedly
    ', '

    Footnote

    Other Footnote

    ')} #{WORD_FTR1} OUTPUT end it "processes class footnotes" do simple_body = '
    This is a very simple document1 allegedly2
    ' Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('
    This is a very simple document allegedly
    ', '

    Footnote

    Other Footnote

    ')} #{WORD_FTR1} OUTPUT end it "processes footnotes with text wrapping the footnote reference" do simple_body = '
    This is a very simple document(1) allegedly2
    ' Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('
    This is a very simple document() allegedly
    ', '

    ()Footnote

    Other Footnote

    ')} #{WORD_FTR1} OUTPUT end it "extracts paragraphs from footnotes" do simple_body = '
    This is a very simple document1 allegedly2

    Other Footnote

    ' Html2Doc.process(html_input(simple_body), filename: "test") expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('
    This is a very simple document allegedly
    ', '

    Footnote

    Other Footnote

    ')} #{WORD_FTR1} OUTPUT end it "labels lists with list styles" do simple_body = <<~BODY
            1. A
            2. B

              B2

            3. C

      • C
      BODY Html2Doc.process(html_input(simple_body), filename: "test", liststyles: { ul: "l1", ol: "l2" }) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('

      A

      B

      B2

      C

      C

      ', '
      ')} #{WORD_FTR1} OUTPUT end it "restarts numbering of lists with list styles" do simple_body = <<~BODY
              1. A

              1. A

      BODY Html2Doc.process(html_input(simple_body), filename: "test", liststyles: { ul: "l1", ol: "l2" }) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('

      A

      A

      ', '
      ')} #{WORD_FTR1} OUTPUT end it "labels lists with multiple list styles" do simple_body = <<~BODY
              1. A
              2. B

                B2

              3. C

              1. A
              2. B

                B2

              3. C

              1. A
              2. B

                B2

              3. C

      BODY Html2Doc.process(html_input(simple_body), filename: "test", liststyles: { ul: "l1", ol: "l2", steps: "l3" }) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('

      A

      B

      B2

      C

      A

      B

      B2

      C

      A

      B

      B2

      C

      ', '
      ')} #{WORD_FTR1} OUTPUT end it "replaces id attributes with explicit a@name bookmarks" do simple_body = <<~BODY

      Hello

      BODY Html2Doc.process(html_input(simple_body), filename: "test", liststyles: { ul: "l1", ol: "l2" }) expect(guid_clean(File.read("test.doc", encoding: "utf-8"))) .to match_fuzzy(<<~OUTPUT) #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END} #{word_body('

      Hello

      ', '
      ')} #{WORD_FTR1} OUTPUT end it "test image base64 image encoding" do simple_body = '' Html2Doc.process(html_input(simple_body), filename: "spec/test", debug: true, imagedir: "spec") testdoc = File.read("spec/test.doc", encoding: "utf-8") base64_image = testdoc[/image\/png\n\n(.*?)\n\n----/m, 1].gsub!("\n", "") base64_image_basename = testdoc[%r{Content-ID: <([0-9a-z\-]+)\.png}m, 1] doc_bin_image = Base64.strict_decode64(base64_image) file_bin_image = IO .read("spec/test_files/#{base64_image_basename}.png", mode: "rb") expect(doc_bin_image).to eq file_bin_image FileUtils.rm_rf %w[spec/test_files spec/test.doc spec/test.htm] end end