postproc_spec.rb in isodoc-0.9.3

- old
+ new

@@ -1,11 +1,12 @@
 require "spec_helper"
+require "fileutils"
 
 RSpec.describe IsoDoc do
   it "generates file based on string input" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
     IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", filename: "test"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
         <bibdata>
         <title language="en">test</title>
         </bibdata>
@@ -23,12 +24,12 @@
     expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/2\.7\.1/MathJax\.js})
     expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
   end
 
   it "generates HTML output docs with null configuration" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
     IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
     <preface><foreword>
     <note>
   <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
@@ -43,12 +44,12 @@
     expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/2\.7\.1/MathJax\.js})
     expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
   end
 
   it "generates Word output docs with null configuration" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
     IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
     <preface><foreword>
     <note>
   <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
@@ -60,22 +61,22 @@
     word = File.read("test.doc")
     expect(word).to match(/one empty stylesheet/)
   end
 
   it "generates HTML output docs with null configuration from file" do
-    system "rm -f spec/assets/iso.doc"
-    system "rm -f spec/assets/iso.html"
+    FileUtils.rm_f "spec/assets/iso.doc"
+    FileUtils.rm_f "spec/assets/iso.html"
     IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", nil, false)
     expect(File.exist?("spec/assets/iso.html")).to be true
     html = File.read("spec/assets/iso.html")
     expect(html).to match(/another empty stylesheet/)
     expect(html).to match(%r{https://use.fontawesome.com})
     expect(html).to match(%r{libs/jquery})
   end
 
-    it "generates Headless HTML output docs with null configuration from file" do
-    system "rm -f spec/assets/iso.html"
+  it "generates Headless HTML output docs with null configuration from file" do
+    FileUtils.rm_f "spec/assets/iso.html"
     IsoDoc::HeadlessHtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", nil, false)
     expect(File.exist?("spec/assets/iso.headless.html")).to be true
     html = File.read("spec/assets/iso.headless.html")
     expect(html).not_to match(/another empty stylesheet/)
     expect(html).not_to match(%r{https://use.fontawesome.com})
@@ -85,27 +86,26 @@
     expect(html).not_to match(%r{<body})
     expect(html).to match(%r{<div})
   end
 
   it "generates Word output docs with null configuration from file" do
-    system "rm -f spec/assets/iso.doc"
+    FileUtils.rm_f "spec/assets/iso.doc"
     IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", nil, false)
     expect(File.exist?("spec/assets/iso.doc")).to be true
     word = File.read("spec/assets/iso.doc")
     expect(word).to match(/one empty stylesheet/)
   end
 
   it "generates PDF output docs with null configuration from file" do
-    system "rm -f spec/assets/iso.pdf"
-    #require "byebug"; byebug
+    FileUtils.rm_f "spec/assets/iso.pdf"
     IsoDoc::PdfConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", nil, false)
     expect(File.exist?("spec/assets/iso.pdf")).to be true
   end
 
   it "generates HTML output docs with complete configuration" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
     IsoDoc::HtmlConvert.new({bodyfont: "Zapf", htmlstylesheet: "spec/assets/html.css", htmlcoverpage: "spec/assets/htmlcover.html", htmlintropage: "spec/assets/htmlintro.html", scripts: "spec/assets/scripts.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
     <preface><foreword>
     <note>
   <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
@@ -122,12 +122,12 @@
     expect(html).not_to match(/CDATA/)
     expect(html).to match(%r{Enkonduko</h1>})
   end
 
   it "generates HTML output docs with default fonts" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
     IsoDoc::HtmlConvert.new({htmlstylesheet: "spec/assets/html.css", htmlcoverpage: "spec/assets/htmlcover.html", htmlintropage: "spec/assets/htmlintro.html", scripts: "spec/assets/scripts.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
     <preface><foreword>
     <note>
   <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
@@ -144,12 +144,12 @@
     expect(html).not_to match(/CDATA/)
     expect(html).to match(%r{Enkonduko</h1>})
   end
 
   it "generates Word output docs with complete configuration" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
     IsoDoc::WordConvert.new({bodyfont: "Zapf", wordstylesheet: "spec/assets/html.css", standardstylesheet: "spec/assets/std.css", header: "spec/assets/header.html", wordcoverpage: "spec/assets/wordcover.html", wordintropage: "spec/assets/wordintro.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
     <preface><foreword>
     <note>
   <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
@@ -167,12 +167,12 @@
     expect(word).to match(/an empty word intro page/)
     expect(word).to match(%r{Enkonduko</h1>})
   end
 
   it "generates Word output docs with default fonts" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
     IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/html.css", standardstylesheet: "spec/assets/std.css", header: "spec/assets/header.html", wordcoverpage: "spec/assets/wordcover.html", wordintropage: "spec/assets/wordintro.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
     <preface><foreword>
     <note>
   <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
@@ -190,12 +190,12 @@
     expect(word).to match(/an empty word intro page/)
     expect(word).to match(%r{Enkonduko</h1>})
   end
 
   it "converts definition lists to tables for Word" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
     IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
      <iso-standard xmlns="http://riboseinc.com/isoxml">
     <preface><foreword>
     <dl>
     <dt>Term</dt>
@@ -232,12 +232,12 @@
               </div>
     OUTPUT
   end
 
   it "converts annex subheadings to h2Annex class for Word" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
     IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
     <iso-standard xmlns="http://riboseinc.com/isoxml">
     <annex id="P" inline-header="false" obligation="normative">
          <title>Annex</title>
          <clause id="Q" inline-header="false" obligation="normative">
@@ -260,13 +260,56 @@
                </div>
              </div>
     OUTPUT
   end
 
+  it "inserts default paragraph between two tables for Word" do 
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
+    IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
+    <iso-standard xmlns="http://riboseinc.com/isoxml">
+    <annex id="P" inline-header="false" obligation="normative">
+    <example id="_63112cbc-cde0-435f-9553-e0b8c4f5851c">
+  <p id="_158d4efa-b1c9-4aec-b325-756de8e4c968">'1M', '01M', and '0001M' all describe the calendar month January.</p>
+</example>
+<example id="_63112cbc-cde0-435f-9553-e0b8c4f5851d">
+  <p id="_158d4efa-b1c9-4aec-b325-756de8e4c969">'2M', '02M', and '0002M' all describe the calendar month February.</p>
+</example>
+    </annex>
+    </iso-standard>
+    INPUT
+    word = File.read("test.doc").sub(/^.*<div class="WordSection3">/m, '<div class="WordSection3">').
+      sub(%r{<div style="mso-element:footnote-list"/>.*$}m, "")
+    expect(word).to be_equivalent_to <<~"OUTPUT"
+    <div class="WordSection3">
+             <p class="zzSTDTitle1"></p>
+             <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
+             <div class="Section3"><a name="P" id="P"></a>
+               <table class="example"><a name="_63112cbc-cde0-435f-9553-e0b8c4f5851c" id="_63112cbc-cde0-435f-9553-e0b8c4f5851c"></a>
+                 <tr>
+                   <td valign="top" class="example_label" style="width:82.8pt;">EXAMPLE  1</td>
+                   <td valign="top" class="example">
+         <p class="example"><a name="_158d4efa-b1c9-4aec-b325-756de8e4c968" id="_158d4efa-b1c9-4aec-b325-756de8e4c968"></a>'1M', '01M', and '0001M' all describe the calendar month January.</p>
+       </td>
+                 </tr>
+               </table><p style="margin-top:0cm;margin-right:0cm;margin-bottom:0cm;margin-left:0.0pt;margin-bottom:.0001pt" class="MsoNormal"><span lang="EN-GB" style="display:none;mso-hide:all" xml:lang="EN-GB">&#xA0;</span></p>
+               <table class="example"><a name="_63112cbc-cde0-435f-9553-e0b8c4f5851d" id="_63112cbc-cde0-435f-9553-e0b8c4f5851d"></a>
+                 <tr>
+                   <td valign="top" class="example_label" style="width:82.8pt;">EXAMPLE  2</td>
+                   <td valign="top" class="example">
+         <p class="example"><a name="_158d4efa-b1c9-4aec-b325-756de8e4c969" id="_158d4efa-b1c9-4aec-b325-756de8e4c969"></a>'2M', '02M', and '0002M' all describe the calendar month February.</p>
+       </td>
+                 </tr>
+               </table>
+             </div>
+           </div>
+    OUTPUT
+  end
+
   it "populates Word template with terms reference labels" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.doc"
+    FileUtils.rm_f "test.html"
     IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
     <sections>
     <terms id="_terms_and_definitions" obligation="normative"><title>Terms and Definitions</title>
 
@@ -292,25 +335,25 @@
                <div><a name="_terms_and_definitions" id="_terms_and_definitions"></a><h1>1.<span style="mso-tab-count:1">&#xA0; </span>Terms and definitions</h1><p class="MsoNormal">For the purposes of this document,
            the following terms and definitions apply.</p>
        <p class="MsoNormal">ISO and IEC maintain terminological databases for use in
        standardization at the following addresses:</p>
 
-       <ul>
-       <li class="MsoNormal"> <p class="MsoNormal">ISO Online browsing platform: available at
-         <a href="http://www.iso.org/obp">http://www.iso.org/obp</a></p> </li>
-       <li class="MsoNormal"> <p class="MsoNormal">IEC Electropedia: available at
+       <p class="MsoListParagraphCxSpFirst"> ISO Online browsing platform: available at
+        <a href="http://www.iso.org/obp">http://www.iso.org/obp</a> </p>
+        <p class="MsoListParagraphCxSpLast"> IEC Electropedia: available at
+
          <a href="http://www.electropedia.org">http://www.electropedia.org</a>
-       </p> </li> </ul>
+       </p> 
        <p class="TermNum"><a name="paddy1" id="paddy1"></a>1.1</p><p class="Terms" style="text-align:left;">paddy</p>
        <p class="MsoNormal"><a name="_eb29b35e-123e-4d1c-b50b-2714d41e747f" id="_eb29b35e-123e-4d1c-b50b-2714d41e747f"></a>rice retaining its husk after threshing</p>
        <p class="MsoNormal">[SOURCE: <a href="#ISO7301">ISO 7301:2011, Clause 3.1</a>, modified &mdash; The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here]</p></div>
              </div>
     OUTPUT
   end
 
   it "populates Word header" do
-    system "rm -f test.doc"
+    FileUtils.rm_f "test.doc"
     IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", header: "spec/assets/header.html"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
                <bibdata type="article">
                         <docidentifier>
            <project-number part="1">1000</project-number>
@@ -332,11 +375,11 @@
 
     OUTPUT
   end
 
   it "populates Word ToC" do
-    system "rm -f test.doc"
+    FileUtils.rm_f "test.doc"
     IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", wordintropage: "spec/assets/wordintro.html"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
         <sections>
                <clause inline-header="false" obligation="normative"><title>Clause 4</title><clause id="N" inline-header="false" obligation="normative">
 
@@ -411,11 +454,11 @@
              </div>
     OUTPUT
   end
 
   it "reorders footnote numbers in HTML" do
-    system "rm -f test.html"
+    FileUtils.rm_f "test.html"
     IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", wordintropage: "spec/assets/wordintro.html"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
         <sections>
                <clause inline-header="false" obligation="normative"><title>Clause 4</title><fn reference="3">
   <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">This is a footnote.</p>
@@ -465,12 +508,12 @@
            </main>
     OUTPUT
   end
 
   it "moves images in HTML" do
-    system "rm -f test.html"
-    system "rm -rf test_images"
+    FileUtils.rm_f "test.html"
+    FileUtils.rm_rf "test_images"
     IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
         <preface><foreword>
          <figure id="_">
          <name>Split-it-right sample divider</name>
@@ -500,12 +543,12 @@
     OUTPUT
 
   end
 
   it "encodes images in HTML as data URIs" do
-    system "rm -f test.html"
-    system "rm -rf test_images"
+    FileUtils.rm_f "test.html"
+    FileUtils.rm_rf "test_images"
     IsoDoc::HtmlConvert.new({htmlstylesheet: "spec/assets/html.css", datauriimage: true}).convert("test", <<~"INPUT", false)
         <iso-standard xmlns="http://riboseinc.com/isoxml">
         <preface><foreword>
          <figure id="_">
          <name>Split-it-right sample divider</name>
@@ -530,11 +573,11 @@
     OUTPUT
 
   end
 
   it "processes IsoXML terms for HTML" do
-    system "rm -f test.doc"
-    system "rm -f test.html"
+    FileUtils.rm_f "test.html"
+    FileUtils.rm_f "test.doc"
     IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
     <iso-standard xmlns="http://riboseinc.com/isoxml">
     <sections>
     <terms id="_terms_and_definitions" obligation="normative"><title>Terms and Definitions</title>