# frozen_string_literal: true require 'spec_helper' require 'fileutils' RSpec.describe IsoDoc do it 'generates file based on string input' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', filename: 'test').convert('test', <<~"INPUT", false) test

These results are based on a study carried out on three different types of kernel.

INPUT expect(File.exist?('test.html')).to be true html = File.read('test.html') expect(html).to match(%r{test}) expect(html).to match(/another empty stylesheet/) expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/}) expect(html).to match(/delimiters: \[\['$#\(', '$#\)'\]\]/) end it 'ignores Liquid markup in the document body' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css').convert('test', <<~"INPUT", false) test

{% elif %}These results are based on a study carried out on three different types of kernel.

INPUT expect(File.exist?('test.html')).to be true html = File.read('test.html') end it 'ignores Liquid markup in the document body (Word)' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css').convert('test', <<~"INPUT", false) test

{% elif %}These results are based on a study carried out on three different types of kernel.

INPUT expect(File.exist?('test.doc')).to be true html = File.read('test.doc') end it 'generates HTML output docs with null configuration' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css').convert('test', <<~"INPUT", false)

These results are based on a study carried out on three different types of kernel.

INPUT expect(File.exist?('test.html')).to be true html = File.read('test.html') expect(html).not_to match(%r{test}) expect(html).not_to match(/another empty stylesheet/) expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/}) expect(html).to match(/delimiters: \[\['$#\(', '$#\)'\]\]/) end it 'generates Word output docs with null configuration' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)

These results are based on a study carried out on three different types of kernel.

INPUT expect(File.exist?('test.doc')).to be true word = File.read('test.doc') expect(word).to match(/one empty stylesheet/) expect(word).to match(/div\.table_container/) end it 'generates HTML output docs with null configuration from file' do FileUtils.rm_f 'spec/assets/iso.doc' FileUtils.rm_f 'spec/assets/iso.html' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('spec/assets/iso.xml', nil, false) expect(File.exist?('spec/assets/iso.html')).to be true html = File.read('spec/assets/iso.html') expect(html).to match(/another empty stylesheet/) expect(html).to match(%r{https://use.fontawesome.com}) expect(html).to match(%r{libs/jquery}) end it 'generates Headless HTML output docs with null configuration from file' do FileUtils.rm_f 'spec/assets/iso.html' IsoDoc::HeadlessHtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('spec/assets/iso.xml', nil, false) expect(File.exist?('spec/assets/iso.headless.html')).to be true html = File.read('spec/assets/iso.headless.html') expect(html).not_to match(/another empty stylesheet/) expect(html).not_to match(%r{https://use.fontawesome.com}) expect(html).not_to match(%r{libs/jquery}) expect(html).not_to match(%r{

These results are based on a study carried out on three different types of kernel.

INPUT html = File.read('test.html') expect(html).to match(/another empty stylesheet/) expect(html).to match(/font-family: Zapf/) expect(html).to match(/an empty html cover page/) expect(html).to match(/an empty html intro page/) expect(html).to match(/This is > a script/) expect(html).not_to match(/CDATA/) expect(html).to match(%r{Enkonduko}) end it 'generates HTML output docs with default fonts' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::HtmlConvert.new(htmlstylesheet: 'spec/assets/html.scss', htmlcoverpage: 'spec/assets/htmlcover.html', htmlintropage: 'spec/assets/htmlintro.html', scripts: 'spec/assets/scripts.html', i18nyaml: 'spec/assets/i18n.yaml', ulstyle: 'l1', olstyle: 'l2').convert('test', <<~"INPUT", false)

These results are based on a study carried out on three different types of kernel.

INPUT html = File.read('test.html') expect(html).to match(/another empty stylesheet/) expect(html).to match(/font-family: Arial/) expect(html).to match(/an empty html cover page/) expect(html).to match(/an empty html intro page/) expect(html).to match(/This is > a script/) expect(html).not_to match(/CDATA/) expect(html).to match(%r{Enkonduko}) end it 'generates Word output docs with complete configuration' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(bodyfont: 'Zapf', wordstylesheet: 'spec/assets/html.scss', standardstylesheet: 'spec/assets/std.css', header: 'spec/assets/header.html', wordcoverpage: 'spec/assets/wordcover.html', wordintropage: 'spec/assets/wordintro.html', i18nyaml: 'spec/assets/i18n.yaml', ulstyle: 'l1', olstyle: 'l2').convert('test', <<~"INPUT", false)

These results are based on a study carried out on three different types of kernel.

INPUT word = File.read('test.doc') expect(word).to match(/another empty stylesheet/) expect(word).to match(/font-family: Zapf/) expect(word).to match(/a third empty stylesheet/) # expect(word).to match(/test<\/title>/) expect(word).to match(/test_files\/header.html/) expect(word).to match(/an empty word cover page/) expect(word).to match(/an empty word intro page/) expect(word).to match(%r{Enkonduko</h1>}) end it 'generates Word output docs with default fonts' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/html.scss', standardstylesheet: 'spec/assets/std.css', header: 'spec/assets/header.html', wordcoverpage: 'spec/assets/wordcover.html', wordintropage: 'spec/assets/wordintro.html', i18nyaml: 'spec/assets/i18n.yaml', ulstyle: 'l1', olstyle: 'l2').convert('test', <<~"INPUT", false) <iso-standard xmlns="http://riboseinc.com/isoxml"> <preface><foreword> <note> <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p> </note> </foreword></preface> </iso-standard> INPUT word = File.read('test.doc') expect(word).to match(/another empty stylesheet/) expect(word).to match(/font-family: Arial/) expect(word).to match(/a third empty stylesheet/) # expect(word).to match(/<title>test<\/title>/) expect(word).to match(/test_files\/header.html/) expect(word).to match(/an empty word cover page/) expect(word).to match(/an empty word intro page/) expect(word).to match(%r{Enkonduko</h1>}) end it 'converts definition lists to tables for Word' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false) <iso-standard xmlns="http://riboseinc.com/isoxml"> <preface><foreword> <dl> <dt>Term</dt> <dd>Definition</dd> <dt>Term 2</dt> <dd>Definition 2</dd> </dl> </foreword></preface> </iso-standard> INPUT word = File.read('test.doc').sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">') .sub(%r{<p class="MsoNormal">\s*<br clear="all" class="section"/>\s*</p>\s*<div class="WordSection3">.*$}m, '') expect(xmlpp(word)).to be_equivalent_to xmlpp(<<~"OUTPUT") <div class="WordSection2"> <p class="MsoNormal"><br clear="all" style="mso-special-character:line-break;page-break-before:always"/></p> <div> <h1 class="ForewordTitle">Foreword</h1> <table class="dl"> <tr> <td valign="top" align="left"> <p align="left" style="margin-left:0pt;text-align:left;" class="MsoNormal">Term</p> </td> <td valign="top">Definition</td> </tr> <tr> <td valign="top" align="left"> <p align="left" style="margin-left:0pt;text-align:left;" class="MsoNormal">Term 2</p> </td> <td valign="top">Definition 2</td> </tr> </table> </div> <p class="MsoNormal"> </p> </div> OUTPUT end it 'populates Word template with terms reference labels' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false) <iso-standard xmlns="http://riboseinc.com/isoxml"> <sections> <terms id="_terms_and_definitions" obligation="normative"><title>Terms and Definitions paddy

rice retaining its husk after threshing

3.1

The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here

INPUT word = File.read('test.doc').sub(/^.*

/m, '

') .sub(%r{

.*$}m, '') expect(xmlpp(word)).to be_equivalent_to xmlpp(<<~"OUTPUT")

1. Terms and Definitions

1.1.

paddy

rice retaining its husk after threshing

[SOURCE: ISO 7301:2011, Clause 3.1, modified — The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here]

OUTPUT end it 'populates Word header' do FileUtils.rm_f 'test.doc' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', header: 'spec/assets/header.html').convert('test', <<~"INPUT", false)

1000

INPUT word = File.read('test.doc').sub(%r{^.*Content-Location: file:///C:/Doc/test_files/header.html}m, 'Content-Location: file:///C:/Doc/test_files/header.html') .sub(/------=_NextPart.*$/m, '') expect(word).to be_equivalent_to <<~"OUTPUT" Content-Location: file:///C:/Doc/test_files/header.html Content-Transfer-Encoding: base64 Content-Type: text/html charset="utf-8" Ci8qIGFuIGVtcHR5IGhlYWRlciAqLwoKU1RBUlQgRE9DIElEOiAKICAgICAgICAgICAxMDAwCiAg ICAgICAgIDogRU5EIERPQyBJRAoKRklMRU5BTUU6IHRlc3QKCg== OUTPUT end it 'populates Word ToC' do FileUtils.rm_f 'test.doc' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', wordintropage: 'spec/assets/wordintro.html').convert('test', <<~"INPUT", false) Clause 4 Introduction<bookmark id="Q"/> to this<fn reference="1"> <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p> </fn> Clause 4.2

Formerly denoted as 15 % (m/m).

Clause 4.2.1 INPUT word = File.read('test.doc').sub(/^.*

/m, '

') .sub(%r{

\s*
\s*

\s*

.*$}m, '') expect(xmlpp(word.gsub(/_Toc\d\d+/, '_Toc'))).to be_equivalent_to xmlpp(<<~'OUTPUT')

/* an empty word intro page */

TOC \o "1-2" \h \z \u 1. Clause 4 . PAGEREF _Toc \h 1

1.1. Introduction to this . PAGEREF _Toc \h 1

1.2. Clause 4.2 . PAGEREF _Toc \h 1

OUTPUT end it 'populates Word ToC with custom levels' do FileUtils.rm_f 'test.doc' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', wordintropage: 'spec/assets/wordintro.html', doctoclevels: 3).convert('test', <<~"INPUT", false) Clause 4 Introduction<bookmark id="Q"/> to this<fn reference="1"> <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p> </fn> Clause 4.2

Formerly denoted as 15 % (m/m).

Clause 4.2.1 INPUT word = File.read('test.doc').sub(/^.*

/m, '

') .sub(%r{

\s*
\s*

\s*

.*$}m, '') expect(xmlpp(word.gsub(/_Toc\d\d+/, '_Toc'))).to be_equivalent_to xmlpp(<<~'OUTPUT')

/* an empty word intro page */

TOC \o "1-3" \h \z \u 1. Clause 4 . PAGEREF _Toc \h 1

1.1. Introduction to this . PAGEREF _Toc \h 1

1.2. Clause 4.2 . PAGEREF _Toc \h 1

1.2.1. Clause 4.2.1 . PAGEREF _Toc \h 1

OUTPUT end it 'generates HTML output with custom ToC levels function' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::HtmlConvert.new(htmltoclevels: 3).convert('test', <<~"INPUT", false)

These results are based on a study carried out on three different types of kernel.

INPUT html = File.read('test.html') toclevel = <<~"TOCLEVEL" function toclevel() { return "h1:not(:empty):not(.TermNum):not(.noTOC),h2:not(:empty):not(.TermNum):not(.noTOC),h3:not(:empty):not(.TermNum):not(.noTOC)";} TOCLEVEL expect(html).to include toclevel end it 'reorders footnote numbers in HTML' do FileUtils.rm_f 'test.html' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', wordintropage: 'spec/assets/wordintro.html').convert('test', <<~"INPUT", false) Clause 4

This is a footnote.

Introduction to this<fn reference="2"> <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p> </fn> Clause 4.2

Formerly denoted as 15 % (m/m).

INPUT html = File.read('test.html').sub(/^.*

/m, '

') .sub(%r{

.*$}m, '

') expect(xmlpp(html)).to be_equivalent_to xmlpp(<<~"OUTPUT")

1. Clause 4

1.1. Introduction to this²

1.2. Clause 4.2

A²

OUTPUT end it 'moves images in HTML' do FileUtils.rm_f 'test.html' FileUtils.rm_rf 'test_htmlimages' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)

INPUT html = File.read('test.html').sub(/^.*

/m, '

') .sub(%r{

.*$}m, '

') expect(`ls test_htmlimages`).to match(/\.png$/) expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, '/_.png'))).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword

Figure 1 — Split-it-right sample divider

OUTPUT end it 'moves images in HTML with no file suffix' do FileUtils.rm_f 'test.html' FileUtils.rm_rf 'test_htmlimages' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)

INPUT html = File.read('test.html').sub(/^.*

/m, '

') .sub(%r{

.*$}m, '

') expect(`ls test_htmlimages`).to match(/\.png$/) expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, '/_.png'))).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword

Figure 1 — Split-it-right sample divider

OUTPUT end it 'moves images in HTML, using relative file location' do FileUtils.rm_f 'spec/test.html' FileUtils.rm_rf 'spec/test_htmlimages' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('spec/test', <<~"INPUT", false)

INPUT html = File.read('spec/test.html').sub(/^.*

/m, '

') .sub(%r{

.*$}m, '

') expect(`ls test_htmlimages`).to match(/\.png$/) expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, '/_.png'))).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword

Figure 1 — Split-it-right sample divider

OUTPUT end it 'encodes images in HTML as data URIs' do FileUtils.rm_f 'test.html' FileUtils.rm_rf 'test_htmlimages' IsoDoc::HtmlConvert.new(htmlstylesheet: 'spec/assets/html.scss', datauriimage: true).convert('test', <<~"INPUT", false)

INPUT html = File.read('test.html').sub(/^.*

/m, '

') .sub(%r{

.*$}m, '

') expect(xmlpp(html.gsub(%r{src="data:image/png;base64,[^"]+"}, %{src="data:image/png;base64,_"}))).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword

Figure 1 — Split-it-right sample divider

OUTPUT end it 'encodes images in HTML as data URIs, using relative file location' do FileUtils.rm_f 'spec/test.html' FileUtils.rm_rf 'spec/test_htmlimages' IsoDoc::HtmlConvert.new(htmlstylesheet: 'spec/assets/html.scss', datauriimage: true).convert('spec/test', <<~"INPUT", false)

INPUT html = File.read('spec/test.html').sub(/^.*

/m, '

') .sub(%r{

.*$}m, '

') expect(xmlpp(html.gsub(%r{src="data:image/png;base64,[^"]+"}, %{src="data:image/png;base64,_"}))).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword

Figure 1 — Split-it-right sample divider

OUTPUT end it 'processes IsoXML terms for HTML' do FileUtils.rm_f 'test.html' FileUtils.rm_f 'test.doc' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false) Terms and Definitions paddy rice

rice retaining its husk after threshing

Foreign seeds, husks, bran, sand, dust.

3.1

The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here

paddypaddy rice rough rice cargo rice

rice retaining its husk after threshing

The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.

3.1 INPUT expect(File.exist?('test.html')).to be true html = File.read('test.html') expect(html).to match(%r{

1\.1\.

}) expect(html).to match(%r{

1\.2\.

}) end it 'processes empty term modifications' do FileUtils.rm_f 'test.html' FileUtils.rm_f 'test.doc' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false) Terms and Definitions paddy rice

rice retaining its husk after threshing

Foreign seeds, husks, bran, sand, dust.

3.1

INPUT expect(File.exist?('test.html')).to be true html = File.read('test.html') expect(html).to include '[SOURCE: ISO 7301:2011, Clause 3.1, modified]' end it 'creates continuation styles for multiparagraph list items in Word' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)

A

B
1. C
  
  D
  E

A1

B1
- C1
  D1

INPUT word = File.read('test.doc').sub(/^.*

/m, '

') .sub(%r{

\s*
\s*

\s*

.*$}m, '') expect(xmlpp(word)).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword

D1 (1)

OUTPUT end it 'does not lose HTML escapes in postprocessing' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false) XML code <xml> & INPUT html = File.read('test.html').sub(/^.*

/m, '

') .sub(%r{

.*$}m, '

') expect(xmlpp(html)).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword


    
  <xml> &

Figure 1 — XML code

OUTPUT end it 'does not lose HTML escapes in postprocessing (Word)' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false) XML code <xml> & INPUT word = File.read('test.doc').sub(/^.*

/m, '

') .sub(%r{

\s*
\s*

\s*

.*$}m, '') expect(xmlpp(word)).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword

<xml> &

Figure 1 — XML code

OUTPUT end it 'propagates example style to paragraphs in postprocessing (Word)' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)

ABC

INPUT word = File.read('test.doc').sub(/^.*

/m, '

') .sub(%r{

\s*
\s*

\s*

.*$}m, '') expect(xmlpp(word)).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword

EXAMPLE

ABC

OUTPUT end it 'deals with image captions (Word)' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)

INPUT word = File.read('test.doc').sub(/^.*

/m, '

') .sub(%r{

\s*
\s*

\s*

.*$}m, '') .sub(/src="[^"]+"/, 'src="_"') expect(xmlpp(word)).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword

Figure 1 — Typical arrangement of the far-field scan set-up

OUTPUT end it 'deals with empty table titles (Word)' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)

Output wavelength $(μ m)$	Predictive wavelengths

INPUT word = File.read('test.doc').sub(/^.*

/m, '

') .sub(%r{

\s*
\s*

\s*

.*$}m, '') .sub(/src="[^"]+"/, 'src="_"') expect(xmlpp(word)).to be_equivalent_to xmlpp(<<~"OUTPUT")

Foreword

Output wavelength (μm)	Predictive wavelengths

OUTPUT end it 'propagates alignment of table cells (Word)' do FileUtils.rm_f 'test.doc' FileUtils.rm_f 'test.html' IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)