postproc_spec.rb in isodoc-1.6.4

- old
+ new

@@ -1,11 +1,11 @@
 # frozen_string_literal: true
 
 require "spec_helper"
 require "fileutils"
 
-OPTIONS = { wordstylesheet: "spec/assets/word.css",
+options = { wordstylesheet: "spec/assets/word.css",
             htmlstylesheet: "spec/assets/html.scss" }
 
 RSpec.describe IsoDoc do
   it "generates file based on string input" do
     FileUtils.rm_f "test.doc"
@@ -189,10 +189,11 @@
         htmlstylesheet: "spec/assets/html.scss",
         htmlstylesheet_override: "spec/assets/html_override.css",
         htmlcoverpage: "spec/assets/htmlcover.html",
         htmlintropage: "spec/assets/htmlintro.html",
         scripts: "spec/assets/scripts.html",
+        scripts_override: "spec/assets/scripts_override.html",
         i18nyaml: "spec/assets/i18n.yaml",
         ulstyle: "l1",
         olstyle: "l2" },
     ).convert("test", <<~"INPUT", false)
               <iso-standard xmlns="http://riboseinc.com/isoxml">
@@ -213,10 +214,11 @@
     expect(html).to match(/p\.note \{[^}]*?font-size: 28pt/m)
     expect(html).to match(/aside \{[^}]*?font-size: 27pt/m)
     expect(html).to match(/an empty html cover page/)
     expect(html).to match(/an empty html intro page/)
     expect(html).to match(/This is > a script/)
+    expect(html).to match(/This is > also a script/)
     expect(html).not_to match(/CDATA/)
     expect(html).to match(%r{Anta&#x16D;parolo</h1>})
     expect(html).to match(%r{html-override})
   end
 
@@ -955,11 +957,13 @@
   end
 
   it "moves images in HTML, using relative file location" do
     FileUtils.rm_f "spec/test.html"
     FileUtils.rm_rf "spec/test_htmlimages"
-    IsoDoc::HtmlConvert.new({ wordstylesheet: "assets/word.css", htmlstylesheet: "assets/html.scss" })
+    IsoDoc::HtmlConvert
+      .new(wordstylesheet: "assets/word.css",
+           htmlstylesheet: "assets/html.scss")
       .convert("spec/test", <<~"INPUT", false)
          <iso-standard xmlns="http://riboseinc.com/isoxml">
          <preface><foreword>
           <figure id="_">
           <name>Split-it-right sample divider</name>
@@ -994,11 +998,11 @@
   end
 
   it "encodes images in HTML as data URIs" do
     FileUtils.rm_f "test.html"
     FileUtils.rm_rf "test_htmlimages"
-    IsoDoc::HtmlConvert.new({ htmlstylesheet: "spec/assets/html.scss", datauriimage: true })
+    IsoDoc::HtmlConvert.new(htmlstylesheet: "spec/assets/html.scss", datauriimage: true)
       .convert("test", <<~"INPUT", false)
          <iso-standard xmlns="http://riboseinc.com/isoxml">
          <preface><foreword>
           <figure id="_">
           <name>Split-it-right sample divider</name>
@@ -1062,11 +1066,11 @@
   end
 
   it "processes IsoXML terms for HTML" do
     FileUtils.rm_f "test.html"
     FileUtils.rm_f "test.doc"
-    IsoDoc::HtmlConvert.new(OPTIONS)
+    IsoDoc::HtmlConvert.new(options)
       .convert("test", <<~"INPUT", false)
             <iso-standard xmlns="http://riboseinc.com/isoxml">
             <sections>
             <terms id="_terms_and_definitions" obligation="normative"><title>Terms and Definitions</title>
         <term id="paddy1"><name>1.1.</name><preferred>paddy</preferred>
@@ -1119,11 +1123,11 @@
   end
 
   it "processes empty term modifications" do
     FileUtils.rm_f "test.html"
     FileUtils.rm_f "test.doc"
-    IsoDoc::HtmlConvert.new(OPTIONS)
+    IsoDoc::HtmlConvert.new(options)
       .convert("test", <<~"INPUT", false)
             <iso-standard xmlns="http://riboseinc.com/isoxml">
             <sections>
             <terms id="_terms_and_definitions" obligation="normative"><title>Terms and Definitions</title>
         <term id="paddy1"><preferred>paddy</preferred>
@@ -1156,11 +1160,11 @@
   end
 
   it "creates continuation styles for multiparagraph list items in Word" do
     FileUtils.rm_f "test.doc"
     FileUtils.rm_f "test.html"
-    IsoDoc::WordConvert.new(OPTIONS)
+    IsoDoc::WordConvert.new(options)
       .convert("test", <<~"INPUT", false)
             <iso-standard xmlns="http://riboseinc.com/isoxml">
             <preface><foreword>
             <ul>
             <li><p>A</p>
@@ -1267,11 +1271,11 @@
   end
 
   it "does not lose HTML escapes in postprocessing" do
     FileUtils.rm_f "test.doc"
     FileUtils.rm_f "test.html"
-    IsoDoc::HtmlConvert.new(OPTIONS)
+    IsoDoc::HtmlConvert.new(options)
       .convert("test", <<~"INPUT", false)
             <iso-standard xmlns="http://riboseinc.com/isoxml">
             <preface><foreword>
             <sourcecode id="samplecode">
             <name>XML code</name>
@@ -1298,11 +1302,11 @@
   end
 
   it "does not lose HTML escapes in postprocessing (Word)" do
     FileUtils.rm_f "test.doc"
     FileUtils.rm_f "test.html"
-    IsoDoc::WordConvert.new(OPTIONS)
+    IsoDoc::WordConvert.new(options)
       .convert("test", <<~"INPUT", false)
             <iso-standard xmlns="http://riboseinc.com/isoxml">
             <preface><foreword>
             <sourcecode id="samplecode">
             <name>XML code</name>
@@ -1329,11 +1333,11 @@
   end
 
   it "propagates example style to paragraphs in postprocessing (Word)" do
     FileUtils.rm_f "test.doc"
     FileUtils.rm_f "test.html"
-    IsoDoc::WordConvert.new(OPTIONS).convert("test", <<~"INPUT", false)
+    IsoDoc::WordConvert.new(options).convert("test", <<~"INPUT", false)
           <iso-standard xmlns="http://riboseinc.com/isoxml">
           <preface><foreword>
           <example id="samplecode">
         <p>ABC</p>
       </example>
@@ -1360,11 +1364,11 @@
   end
 
   it "deals with image captions (Word)" do
     FileUtils.rm_f "test.doc"
     FileUtils.rm_f "test.html"
-    IsoDoc::WordConvert.new(OPTIONS)
+    IsoDoc::WordConvert.new(options)
       .convert("test", <<~"INPUT", false)
             <iso-standard xmlns="http://riboseinc.com/isoxml">
             <preface><foreword>
         <figure id="fig1">
           <name>Typical arrangement of the far-field scan set-up</name>
@@ -1394,11 +1398,11 @@
   end
 
   it "deals with empty table titles (Word)" do
     FileUtils.rm_f "test.doc"
     FileUtils.rm_f "test.html"
-    IsoDoc::WordConvert.new(OPTIONS)
+    IsoDoc::WordConvert.new(options)
       .convert("test", <<~"INPUT", false)
             <iso-standard xmlns="http://riboseinc.com/isoxml">
             <preface><foreword>
         <table id="_fe12b8f8-6858-4cd6-af7d-d4b6f3ebd1a7" unnumbered="true"><thead><tr>
               <td rowspan="2">
@@ -1455,26 +1459,25 @@
   end
 
   it "propagates alignment of table cells (Word)" do
     FileUtils.rm_f "test.doc"
     FileUtils.rm_f "test.html"
-    IsoDoc::WordConvert.new(OPTIONS)
+    IsoDoc::WordConvert.new(options)
       .convert("test", <<~"INPUT", false)
-        #{'                  '}
-                <iso-standard xmlns="http://riboseinc.com/isoxml">
-                          <preface><foreword>
-                      <table id="_fe12b8f8-6858-4cd6-af7d-d4b6f3ebd1a7" unnumbered="true"><thead><tr>
-                            <td rowspan="2" align="left">
-                              <p id="_c47d9b39-adb2-431d-9320-78cb148fdb56">Output wavelength</p>
-                              <p id="_c47d9b39-adb2-431d-9320-78cb148fdb57">Output wavelength</p>
-                            </td>
-                            <th colspan="3" align="right"><p id="_c47d9b39-adb2-431d-9320-78cb148fdb58">Predictive wavelengths</p></th>
-                          </tr>
-                          </thead>
-                          </table>
-                          </preface>
-                          </iso-standard>
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+                  <preface><foreword>
+              <table id="_fe12b8f8-6858-4cd6-af7d-d4b6f3ebd1a7" unnumbered="true"><thead><tr>
+                    <td rowspan="2" align="left">
+                      <p id="_c47d9b39-adb2-431d-9320-78cb148fdb56">Output wavelength</p>
+                      <p id="_c47d9b39-adb2-431d-9320-78cb148fdb57">Output wavelength</p>
+                    </td>
+                    <th colspan="3" align="right"><p id="_c47d9b39-adb2-431d-9320-78cb148fdb58">Predictive wavelengths</p></th>
+                  </tr>
+                  </thead>
+                  </table>
+                  </preface>
+                  </iso-standard>
       INPUT
     word = File.read("test.doc")
       .sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2" xmlns:m="m">')
       .sub(%r{<p class="MsoNormal">\s*<br clear="all" class="section"/>\s*</p>\s*<div class="WordSection3">.*$}m, "")
       .sub(/src="[^"]+"/, 'src="_"')
@@ -1515,11 +1518,11 @@
          </div>
     OUTPUT
   end
 
   it "cleans up boilerplate" do
-    expect(xmlpp(IsoDoc::HtmlConvert.new({ wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.scss", filename: "test" }).html_preface(Nokogiri::XML(<<~INPUT)).to_xml).sub(/^.*<main/m, "<main").sub(%r{</main>.*$}m, "</main>")).to be_equivalent_to xmlpp(<<~"OUTPUT")
+    input = <<~INPUT
       <html>
       <head/>
       <body>
       <div class="main-section">
       <div id="boilerplate-copyright"> <h1>Copyright</h1> </div>
@@ -1533,10 +1536,11 @@
       <div id="boilerplate-copyright-destination"/>
       </div>
       </body>
       </html>
     INPUT
+    html = <<~OUTPUT
       <main class='main-section'>
         <button onclick='topFunction()' id='myBtn' title='Go to top'>Top</button>
         <hr/>
         <div id='boilerplate-feedback'>
           <h1 class='IntroTitle'>Feedback</h1>
@@ -1550,31 +1554,11 @@
         <div id='boilerplate-copyright'>
           <h1 class='IntroTitle'>Copyright</h1>
         </div>
       </main>
     OUTPUT
-  end
-
-  it "cleans up boilerplate (Word)" do
-    expect(xmlpp(IsoDoc::WordConvert.new({ wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.scss", filename: "test" }).word_cleanup(Nokogiri::XML(<<~INPUT)).to_xml).sub(/^.*<main/m, "<main").sub(%r{</main>.*$}m, "</main>")).to be_equivalent_to xmlpp(<<~"OUTPUT")
-      <html>
-      <head/>
-      <body>
-      <div class="main-section">
-      <div id="boilerplate-copyright"> <h1>Copyright</h1> </div>
-      <div id="boilerplate-license"> <h1>License</h1> </div>
-      <div id="boilerplate-legal"> <h1>Legal</h1> </div>
-      <div id="boilerplate-feedback"> <h1>Feedback</h1> </div>
-      <hr/>
-      <div id="boilerplate-feedback-destination"/>
-      <div id="boilerplate-legal-destination"/>
-      <div id="boilerplate-license-destination"/>
-      <div id="boilerplate-copyright-destination"/>
-      </div>
-      </body>
-      </html>
-    INPUT
+    doc = <<~OUTPUT
           <html>
         <head/>
         <body>
           <div class='main-section'>
             <hr/>
@@ -1592,10 +1576,22 @@
             </div>
           </div>
         </body>
       </html>
     OUTPUT
+    expect(xmlpp(IsoDoc::HtmlConvert
+      .new(wordstylesheet: "spec/assets/word.css",
+           htmlstylesheet: "spec/assets/html.scss", filename: "test")
+      .html_preface(Nokogiri::XML(input)).to_xml)
+      .sub(/^.*<main/m, "<main").sub(%r{</main>.*$}m, "</main>"))
+      .to be_equivalent_to xmlpp(html)
+    expect(xmlpp(IsoDoc::WordConvert
+      .new(wordstylesheet: "spec/assets/word.css",
+           htmlstylesheet: "spec/assets/html.scss", filename: "test")
+       .word_cleanup(Nokogiri::XML(input)).to_xml)
+       .sub(/^.*<main/m, "<main").sub(%r{</main>.*$}m, "</main>"))
+      .to be_equivalent_to xmlpp(doc)
   end
 
   it "deals with landscape and portrait pagebreaks (Word)" do
     FileUtils.rm_f "test.doc"
     IsoDoc::WordConvert.new(
@@ -1794,11 +1790,11 @@
                </body>
       OUTPUT
   end
 
   it "expands out nested tables in Word" do
-    expect(xmlpp(IsoDoc::WordConvert.new({ wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.scss", filename: "test" }).word_cleanup(Nokogiri::XML(<<~INPUT)).to_xml).sub(/^.*<main/m, "<main").sub(%r{</main>.*$}m, "</main>")).to be_equivalent_to xmlpp(<<~"OUTPUT")
+    input = <<~INPUT
           <html>
           <head/>
           <body>
           <div class="main-section">
           <table id="_7830dff8-419e-4b9e-85cf-a063689f44ca" class="recommend" style="border-collapse:collapse;border-spacing:0;"><thead><tr style="background:#A5A5A5;"><th style="vertical-align:top;" class="recommend" colspan="2"><p class="RecommendationTitle">Requirement 1:</p></th></tr></thead><tbody><tr><td style="vertical-align:top;" class="recommend" colspan="2"><p>requirement label</p></td></tr>
@@ -1816,10 +1812,11 @@
       </td></tr></tbody></table></tbody></table>
           </div>
           </body>
           </html>
     INPUT
+    output = <<~OUTPUT
           <html>
         <head/>
         <body>
           <div class='main-section'>
                <table id='_7830dff8-419e-4b9e-85cf-a063689f44ca' class='recommend' style='border-collapse:collapse;border-spacing:0;'>
@@ -1911,14 +1908,20 @@
            </table>
           </div>
         </body>
       </html>
     OUTPUT
+    expect(xmlpp(IsoDoc::WordConvert
+      .new(wordstylesheet: "spec/assets/word.css",
+           htmlstylesheet: "spec/assets/html.scss", filename: "test")
+      .word_cleanup(Nokogiri::XML(input)).to_xml)
+      .sub(/^.*<main/m, "<main").sub(%r{</main>.*$}m, "</main>"))
+      .to be_equivalent_to xmlpp(output)
   end
 
   it "allocate widths to tables (Word)" do
-    expect(xmlpp(IsoDoc::WordConvert.new({ wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.scss", filename: "test" }).word_cleanup(Nokogiri::XML(<<~INPUT)).to_xml).sub(/^.*<main/m, "<main").sub(%r{</main>.*$}m, "</main>")).to be_equivalent_to xmlpp(<<~"OUTPUT")
+    input = <<~INPUT
              <html xmlns:epub="http://www.idpf.org/2007/ops" lang="en">
                <head><style/></head>
                <body lang='EN-US' link='blue' vlink='#954F72'>
                  <div class='WordSection1'>
                    <p>&#160;</p>
@@ -2048,10 +2051,11 @@
                    </aside>
                  </div>
                </body>
              </html>
     INPUT
+    output = <<~OUTPUT
       <html xmlns:epub='http://www.idpf.org/2007/ops' lang='en'>
                <head>
                  <style/>
                </head>
                <body lang='EN-US' link='blue' vlink='#954F72'>
@@ -2183,7 +2187,64 @@
                    </aside>
                  </div>
                </body>
              </html>
     OUTPUT
+    expect(xmlpp(IsoDoc::WordConvert
+      .new(wordstylesheet: "spec/assets/word.css",
+           htmlstylesheet: "spec/assets/html.scss", filename: "test")
+      .word_cleanup(Nokogiri::XML(input)).to_xml)
+      .sub(/^.*<main/m, "<main").sub(%r{</main>.*$}m, "</main>"))
+      .to be_equivalent_to xmlpp(output)
+  end
+
+  it "generates bare HTML file" do
+    FileUtils.rm_f "test.html"
+    IsoDoc::HtmlConvert.new(
+      { bare: true,
+        htmlstylesheet: "spec/assets/html.scss",
+        filename: "test" },
+    ).convert("test", <<~"INPUT", false)
+            <iso-standard xmlns="http://riboseinc.com/isoxml">
+              <bibdata>
+              <title language="en">test</title>
+              </bibdata>
+              <boilerplate>
+              <feedback-statement>
+              <clause><title>I am boilerplate</title></clause>
+              </feedback-statement>
+              </boilerplate>
+          <preface><foreword>
+          <note>
+        <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
+      </note>
+          </foreword></preface>
+          </iso-standard>
+    INPUT
+    output = <<~OUTPUT
+      <body lang='en' xml:lang='en'>
+        <main class='main-section'>
+          <button onclick='topFunction()' id='myBtn' title='Go to top'>Top</button>
+          <br/>
+          <div>
+            <h1 class='ForewordTitle'>Foreword</h1>
+            <div class='Note'>
+              <p>
+                &#xA0; These results are based on a study carried out on three
+                different types of kernel.
+              </p>
+            </div>
+          </div>
+          <p class='zzSTDTitle1'>test</p>
+        </main>
+        <script/>
+      </body>
+    OUTPUT
+    expect(File.exist?("test.html")).to be true
+    html = File.read("test.html")
+      .sub(%r{^.*<body}m, "<body")
+      .sub(%r{</body>.*$}m, "</body>")
+      .gsub(%r{<script.+?</script>}m, "<script/>")
+      .sub(%r{(<script/>\s+)+}m, "<script/>")
+    expect(xmlpp(html)).to be_equivalent_to xmlpp(output)
   end
 end