# frozen_string_literal: true
require 'spec_helper'
require 'fileutils'
RSpec.describe IsoDoc do
it 'generates file based on string input' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', filename: 'test').convert('test', <<~"INPUT", false)
test
These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?('test.html')).to be true
html = File.read('test.html')
expect(html).to match(%r{
test})
expect(html).to match(/another empty stylesheet/)
expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/})
expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
end
it 'ignores Liquid markup in the document body' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css').convert('test', <<~"INPUT", false)
test
{% elif %}These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?('test.html')).to be true
html = File.read('test.html')
end
it 'ignores Liquid markup in the document body (Word)' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css').convert('test', <<~"INPUT", false)
test
{% elif %}These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?('test.doc')).to be true
html = File.read('test.doc')
end
it 'generates HTML output docs with null configuration' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css').convert('test', <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?('test.html')).to be true
html = File.read('test.html')
expect(html).not_to match(%r{test})
expect(html).not_to match(/another empty stylesheet/)
expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/})
expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
end
it 'generates Word output docs with null configuration' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?('test.doc')).to be true
word = File.read('test.doc')
expect(word).to match(/one empty stylesheet/)
expect(word).to match(/div\.table_container/)
end
it 'generates HTML output docs with null configuration from file' do
FileUtils.rm_f 'spec/assets/iso.doc'
FileUtils.rm_f 'spec/assets/iso.html'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('spec/assets/iso.xml', nil, false)
expect(File.exist?('spec/assets/iso.html')).to be true
html = File.read('spec/assets/iso.html')
expect(html).to match(/another empty stylesheet/)
expect(html).to match(%r{https://use.fontawesome.com})
expect(html).to match(%r{libs/jquery})
end
it 'generates Headless HTML output docs with null configuration from file' do
FileUtils.rm_f 'spec/assets/iso.html'
IsoDoc::HeadlessHtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('spec/assets/iso.xml', nil, false)
expect(File.exist?('spec/assets/iso.headless.html')).to be true
html = File.read('spec/assets/iso.headless.html')
expect(html).not_to match(/another empty stylesheet/)
expect(html).not_to match(%r{https://use.fontawesome.com})
expect(html).not_to match(%r{libs/jquery})
expect(html).not_to match(%r{
These results are based on a study carried out on three different types of kernel.
INPUT
html = File.read('test.html')
expect(html).to match(/another empty stylesheet/)
expect(html).to match(/font-family: Zapf/)
expect(html).to match(/an empty html cover page/)
expect(html).to match(/an empty html intro page/)
expect(html).to match(/This is > a script/)
expect(html).not_to match(/CDATA/)
expect(html).to match(%r{Enkonduko})
end
it 'generates HTML output docs with default fonts' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::HtmlConvert.new(htmlstylesheet: 'spec/assets/html.scss', htmlcoverpage: 'spec/assets/htmlcover.html', htmlintropage: 'spec/assets/htmlintro.html', scripts: 'spec/assets/scripts.html', i18nyaml: 'spec/assets/i18n.yaml', ulstyle: 'l1', olstyle: 'l2').convert('test', <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
html = File.read('test.html')
expect(html).to match(/another empty stylesheet/)
expect(html).to match(/font-family: Arial/)
expect(html).to match(/an empty html cover page/)
expect(html).to match(/an empty html intro page/)
expect(html).to match(/This is > a script/)
expect(html).not_to match(/CDATA/)
expect(html).to match(%r{Enkonduko})
end
it 'generates Word output docs with complete configuration' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(bodyfont: 'Zapf', wordstylesheet: 'spec/assets/html.scss', standardstylesheet: 'spec/assets/std.css', header: 'spec/assets/header.html', wordcoverpage: 'spec/assets/wordcover.html', wordintropage: 'spec/assets/wordintro.html', i18nyaml: 'spec/assets/i18n.yaml', ulstyle: 'l1', olstyle: 'l2').convert('test', <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
word = File.read('test.doc')
expect(word).to match(/another empty stylesheet/)
expect(word).to match(/font-family: Zapf/)
expect(word).to match(/a third empty stylesheet/)
# expect(word).to match(/test<\/title>/)
expect(word).to match(/test_files\/header.html/)
expect(word).to match(/an empty word cover page/)
expect(word).to match(/an empty word intro page/)
expect(word).to match(%r{Enkonduko})
end
it 'generates Word output docs with default fonts' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/html.scss', standardstylesheet: 'spec/assets/std.css', header: 'spec/assets/header.html', wordcoverpage: 'spec/assets/wordcover.html', wordintropage: 'spec/assets/wordintro.html', i18nyaml: 'spec/assets/i18n.yaml', ulstyle: 'l1', olstyle: 'l2').convert('test', <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
word = File.read('test.doc')
expect(word).to match(/another empty stylesheet/)
expect(word).to match(/font-family: Arial/)
expect(word).to match(/a third empty stylesheet/)
# expect(word).to match(/test<\/title>/)
expect(word).to match(/test_files\/header.html/)
expect(word).to match(/an empty word cover page/)
expect(word).to match(/an empty word intro page/)
expect(word).to match(%r{Enkonduko})
end
it 'converts definition lists to tables for Word' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
OUTPUT
end
it 'populates Word template with terms reference labels' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
Terms and Definitionspaddy
rice retaining its husk after threshing
3.1
The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here
OUTPUT
end
it 'populates Word ToC with custom levels' do
FileUtils.rm_f 'test.doc'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', wordintropage: 'spec/assets/wordintro.html', doctoclevels: 3).convert('test', <<~"INPUT", false)
Clause 4Introduction to this
Formerly denoted as 15 % (m/m).
Clause 4.2
A
Formerly denoted as 15 % (m/m).
Clause 4.2.1
INPUT
word = File.read('test.doc').sub(/^.*
OUTPUT
end
it 'generates HTML output with custom ToC levels function' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::HtmlConvert.new(htmltoclevels: 3).convert('test', <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
html = File.read('test.html')
toclevel = <<~"TOCLEVEL"
function toclevel() { return "h1:not(:empty):not(.TermNum):not(.noTOC),h2:not(:empty):not(.TermNum):not(.noTOC),h3:not(:empty):not(.TermNum):not(.noTOC)";}
TOCLEVEL
expect(html).to include toclevel
end
it 'reorders footnote numbers in HTML' do
FileUtils.rm_f 'test.html'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', wordintropage: 'spec/assets/wordintro.html').convert('test', <<~"INPUT", false)
Clause 4
This is a footnote.
Introduction to this
Formerly denoted as 15 % (m/m).
Clause 4.2
A
Formerly denoted as 15 % (m/m).
INPUT
html = File.read('test.html').sub(/^.*/m, '')
.sub(%r{.*$}m, '')
expect(xmlpp(html)).to be_equivalent_to xmlpp(<<~"OUTPUT")
OUTPUT
end
it 'moves images in HTML' do
FileUtils.rm_f 'test.html'
FileUtils.rm_rf 'test_htmlimages'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
Split-it-right sample divider
INPUT
html = File.read('test.html').sub(/^.*/m, '')
.sub(%r{.*$}m, '')
expect(`ls test_htmlimages`).to match(/\.png$/)
expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, '/_.png'))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Figure 1 — Split-it-right sample divider
OUTPUT
end
it 'moves images in HTML with no file suffix' do
FileUtils.rm_f 'test.html'
FileUtils.rm_rf 'test_htmlimages'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
Split-it-right sample divider
INPUT
html = File.read('test.html').sub(/^.*/m, '')
.sub(%r{.*$}m, '')
expect(`ls test_htmlimages`).to match(/\.png$/)
expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, '/_.png'))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Figure 1 — Split-it-right sample divider
OUTPUT
end
it 'moves images in HTML, using relative file location' do
FileUtils.rm_f 'spec/test.html'
FileUtils.rm_rf 'spec/test_htmlimages'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('spec/test', <<~"INPUT", false)
Split-it-right sample divider
INPUT
html = File.read('spec/test.html').sub(/^.*/m, '')
.sub(%r{.*$}m, '')
expect(`ls test_htmlimages`).to match(/\.png$/)
expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, '/_.png'))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Figure 1 — Split-it-right sample divider
OUTPUT
end
it 'encodes images in HTML as data URIs' do
FileUtils.rm_f 'test.html'
FileUtils.rm_rf 'test_htmlimages'
IsoDoc::HtmlConvert.new(htmlstylesheet: 'spec/assets/html.scss', datauriimage: true).convert('test', <<~"INPUT", false)
Split-it-right sample divider
INPUT
html = File.read('test.html').sub(/^.*/m, '')
.sub(%r{.*$}m, '')
expect(xmlpp(html.gsub(%r{src="data:image/png;base64,[^"]+"}, %{src="data:image/png;base64,_"}))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Figure 1 — Split-it-right sample divider
OUTPUT
end
it 'encodes images in HTML as data URIs, using relative file location' do
FileUtils.rm_f 'spec/test.html'
FileUtils.rm_rf 'spec/test_htmlimages'
IsoDoc::HtmlConvert.new(htmlstylesheet: 'spec/assets/html.scss', datauriimage: true).convert('spec/test', <<~"INPUT", false)
Split-it-right sample divider
INPUT
html = File.read('spec/test.html').sub(/^.*/m, '')
.sub(%r{.*$}m, '')
expect(xmlpp(html.gsub(%r{src="data:image/png;base64,[^"]+"}, %{src="data:image/png;base64,_"}))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Figure 1 — Split-it-right sample divider
OUTPUT
end
it 'processes IsoXML terms for HTML' do
FileUtils.rm_f 'test.html'
FileUtils.rm_f 'test.doc'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
Terms and Definitionspaddyrice
rice retaining its husk after threshing
Foreign seeds, husks, bran, sand, dust.
A
A
3.1
The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here
paddypaddy ricerough ricecargo rice
rice retaining its husk after threshing
A
The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.
A
The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.
3.1
INPUT
expect(File.exist?('test.html')).to be true
html = File.read('test.html')
expect(html).to match(%r{
1\.1\.
})
expect(html).to match(%r{
1\.2\.
})
end
it 'processes empty term modifications' do
FileUtils.rm_f 'test.html'
FileUtils.rm_f 'test.doc'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
Terms and Definitionspaddyrice
rice retaining its husk after threshing
Foreign seeds, husks, bran, sand, dust.
A
A
3.1
INPUT
expect(File.exist?('test.html')).to be true
html = File.read('test.html')
expect(html).to include '[SOURCE: ISO 7301:2011, Clause 3.1, modified]'
end
it 'creates continuation styles for multiparagraph list items in Word' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
OUTPUT
end
it 'does not lose HTML escapes in postprocessing' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
XML code
<xml> &
INPUT
html = File.read('test.html').sub(/^.*/m, '')
.sub(%r{.*$}m, '')
expect(xmlpp(html)).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
<xml> &
Figure 1 — XML code
OUTPUT
end
it 'does not lose HTML escapes in postprocessing (Word)' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
XML code
<xml> &
INPUT
word = File.read('test.doc').sub(/^.*
OUTPUT
end
it 'propagates example style to paragraphs in postprocessing (Word)' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
OUTPUT
end
it 'deals with image captions (Word)' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
Typical arrangement of the far-field scan set-up
INPUT
word = File.read('test.doc').sub(/^.*
Figure 1 — Typical arrangement of the far-field scan set-up
OUTPUT
end
it 'deals with empty table titles (Word)' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
OUTPUT
end
it 'propagates alignment of table cells (Word)' do
FileUtils.rm_f 'test.doc'
FileUtils.rm_f 'test.html'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss').convert('test', <<~"INPUT", false)
OUTPUT
end
it 'cleans up boilerplate' do
expect(xmlpp(IsoDoc::HtmlConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', filename: 'test').html_preface(Nokogiri::XML(<<~INPUT)).to_xml).sub(/^.*.*$}m, '')).to be_equivalent_to xmlpp(<<~"OUTPUT")
Copyright
License
Legal
Feedback
INPUT
Feedback
Legal
License
Copyright
OUTPUT
end
it 'cleans up boilerplate (Word)' do
expect(xmlpp(IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', filename: 'test').word_cleanup(Nokogiri::XML(<<~INPUT)).to_xml).sub(/^.*.*$}m, '')).to be_equivalent_to xmlpp(<<~"OUTPUT")
Copyright
License
Legal
Feedback
INPUT
Feedback
Legal
License
Copyright
OUTPUT
end
it 'deals with landscape and portrait pagebreaks (Word)' do
FileUtils.rm_f 'test.doc'
IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', filename: 'test').convert('test', <<~"INPUT", false)
Document title1.2enpublishedarticlePreface 1
This is a paragraph
A
B
Preface 1.1
On my side
Upright again
Preface 1.3
And still upright
Foreword
For further information on the Foreword, see ISO/IEC Directives, Part 2, 2016, Clause 12.
A
B
C
D
B
And up
Annex 1
INPUT
expect(File.exist?('test.doc')).to be true
html = File.read('test.doc', encoding: 'UTF-8')
expect(html).to include 'div.WordSection2_0 {page:WordSection2P;}'
expect(html).to include 'div.WordSection2_1 {page:WordSection2L;}'
expect(html).to include 'div.WordSection3_0 {page:WordSection3P;}'
expect(html).to include 'div.WordSection3_1 {page:WordSection3P;}'
expect(html).to include 'div.WordSection3_2 {page:WordSection3L;}'
expect(xmlpp(html.sub(/^.*.*$}m, ''))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Introduction
This is a
paragraph
A
B
Preface 1.1
On my side
Upright again
Preface 1.3
And still upright
Document title
Foreword
NOTE 1
For further information on the Foreword, see
ISO/IEC Directives, Part 2, 2016, Clause 12.
Table 1
A
B
C
D
NOTE
B
And up
Annex A
(normative)
Annex 1
OUTPUT
end
it 'expands out nested tables in Word' do
expect(xmlpp(IsoDoc::WordConvert.new(wordstylesheet: 'spec/assets/word.css', htmlstylesheet: 'spec/assets/html.scss', filename: 'test').word_cleanup(Nokogiri::XML(<<~INPUT)).to_xml).sub(/^.*.*$}m, '')).to be_equivalent_to xmlpp(<<~"OUTPUT")