require "spec_helper"
require "fileutils"
RSpec.describe IsoDoc do
it "generates file based on string input" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", filename: "test"}).convert("test", <<~"INPUT", false)
test
These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
expect(html).to match(%r{
test})
expect(html).to match(/another empty stylesheet/)
expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/})
expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
end
it "ignores Liquid markup in the document body" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css"}).convert("test", <<~"INPUT", false)
test
{% elif %}These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
end
it "ignores Liquid markup in the document body (Word)" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css"}).convert("test", <<~"INPUT", false)
test
{% elif %}These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?("test.doc")).to be true
html = File.read("test.doc")
end
it "generates HTML output docs with null configuration" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css"}).convert("test", <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
expect(html).not_to match(%r{test})
expect(html).not_to match(/another empty stylesheet/)
expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/})
expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
end
it "generates Word output docs with null configuration" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?("test.doc")).to be true
word = File.read("test.doc")
expect(word).to match(/one empty stylesheet/)
expect(word).to match(/div\.table_container/)
end
it "generates HTML output docs with null configuration from file" do
FileUtils.rm_f "spec/assets/iso.doc"
FileUtils.rm_f "spec/assets/iso.html"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", nil, false)
expect(File.exist?("spec/assets/iso.html")).to be true
html = File.read("spec/assets/iso.html")
expect(html).to match(/another empty stylesheet/)
expect(html).to match(%r{https://use.fontawesome.com})
expect(html).to match(%r{libs/jquery})
end
it "generates Headless HTML output docs with null configuration from file" do
FileUtils.rm_f "spec/assets/iso.html"
IsoDoc::HeadlessHtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", nil, false)
expect(File.exist?("spec/assets/iso.headless.html")).to be true
html = File.read("spec/assets/iso.headless.html")
expect(html).not_to match(/another empty stylesheet/)
expect(html).not_to match(%r{https://use.fontawesome.com})
expect(html).not_to match(%r{libs/jquery})
expect(html).not_to match(%r{
These results are based on a study carried out on three different types of kernel.
INPUT
html = File.read("test.html")
expect(html).to match(/another empty stylesheet/)
expect(html).to match(/font-family: Zapf/)
expect(html).to match(/an empty html cover page/)
expect(html).to match(/an empty html intro page/)
expect(html).to match(/This is > a script/)
expect(html).not_to match(/CDATA/)
expect(html).to match(%r{Enkonduko})
end
it "generates HTML output docs with default fonts" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new({htmlstylesheet: "spec/assets/html.css", htmlcoverpage: "spec/assets/htmlcover.html", htmlintropage: "spec/assets/htmlintro.html", scripts: "spec/assets/scripts.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert("test", <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
html = File.read("test.html")
expect(html).to match(/another empty stylesheet/)
expect(html).to match(/font-family: Arial/)
expect(html).to match(/an empty html cover page/)
expect(html).to match(/an empty html intro page/)
expect(html).to match(/This is > a script/)
expect(html).not_to match(/CDATA/)
expect(html).to match(%r{Enkonduko})
end
it "generates Word output docs with complete configuration" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({bodyfont: "Zapf", wordstylesheet: "spec/assets/html.css", standardstylesheet: "spec/assets/std.css", header: "spec/assets/header.html", wordcoverpage: "spec/assets/wordcover.html", wordintropage: "spec/assets/wordintro.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert("test", <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
word = File.read("test.doc")
expect(word).to match(/another empty stylesheet/)
expect(word).to match(/font-family: Zapf/)
expect(word).to match(/a third empty stylesheet/)
#expect(word).to match(/test<\/title>/)
expect(word).to match(/test_files\/header.html/)
expect(word).to match(/an empty word cover page/)
expect(word).to match(/an empty word intro page/)
expect(word).to match(%r{Enkonduko})
end
it "generates Word output docs with default fonts" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/html.css", standardstylesheet: "spec/assets/std.css", header: "spec/assets/header.html", wordcoverpage: "spec/assets/wordcover.html", wordintropage: "spec/assets/wordintro.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert("test", <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
word = File.read("test.doc")
expect(word).to match(/another empty stylesheet/)
expect(word).to match(/font-family: Arial/)
expect(word).to match(/a third empty stylesheet/)
#expect(word).to match(/test<\/title>/)
expect(word).to match(/test_files\/header.html/)
expect(word).to match(/an empty word cover page/)
expect(word).to match(/an empty word intro page/)
expect(word).to match(%r{Enkonduko})
end
it "converts definition lists to tables for Word" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
OUTPUT
end
it "populates Word template with terms reference labels" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
Terms and Definitionspaddy
rice retaining its husk after threshing
3.1
The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here
OUTPUT
end
it "populates Word ToC with custom levels" do
FileUtils.rm_f "test.doc"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", wordintropage: "spec/assets/wordintro.html", doctoclevels: 3}).convert("test", <<~"INPUT", false)
Clause 4Introduction to this
Formerly denoted as 15 % (m/m).
Clause 4.2
A
Formerly denoted as 15 % (m/m).
Clause 4.2.1
INPUT
word = File.read("test.doc").sub(/^.*
OUTPUT
end
it "generates HTML output with custom ToC levels function" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new({htmltoclevels: 3}).convert("test", <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
html = File.read("test.html")
toclevel = <<~"TOCLEVEL"
function toclevel() { return "h1:not(:empty):not(.TermNum):not(.noTOC),h2:not(:empty):not(.TermNum):not(.noTOC),h3:not(:empty):not(.TermNum):not(.noTOC)";}
TOCLEVEL
expect(html).to include toclevel
end
it "reorders footnote numbers in HTML" do
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", wordintropage: "spec/assets/wordintro.html"}).convert("test", <<~"INPUT", false)
Clause 4
This is a footnote.
Introduction to this
Formerly denoted as 15 % (m/m).
Clause 4.2
A
Formerly denoted as 15 % (m/m).
INPUT
html = File.read("test.html").sub(/^.*/m, '').
sub(%r{.*$}m, "")
expect(xmlpp(html)).to be_equivalent_to xmlpp(<<~"OUTPUT")
OUTPUT
end
it "moves images in HTML" do
FileUtils.rm_f "test.html"
FileUtils.rm_rf "test_htmlimages"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
INPUT
html = File.read("test.html").sub(/^.*/m, '').
sub(%r{.*$}m, "")
expect(`ls test_htmlimages`).to match(/\.png$/)
expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, "/_.png"))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Figure 1 — Split-it-right sample divider
OUTPUT
end
it "moves images in HTML with no file suffix" do
FileUtils.rm_f "test.html"
FileUtils.rm_rf "test_htmlimages"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
INPUT
html = File.read("test.html").sub(/^.*/m, '').
sub(%r{.*$}m, "")
expect(`ls test_htmlimages`).to match(/\.png$/)
expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, "/_.png"))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Figure 1 — Split-it-right sample divider
OUTPUT
end
it "moves images in HTML, using relative file location" do
FileUtils.rm_f "spec/test.html"
FileUtils.rm_rf "spec/test_htmlimages"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/test", <<~"INPUT", false)
INPUT
html = File.read("spec/test.html").sub(/^.*/m, '').
sub(%r{.*$}m, "")
expect(`ls test_htmlimages`).to match(/\.png$/)
expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, "/_.png"))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Figure 1 — Split-it-right sample divider
OUTPUT
end
it "encodes images in HTML as data URIs" do
FileUtils.rm_f "test.html"
FileUtils.rm_rf "test_htmlimages"
IsoDoc::HtmlConvert.new({htmlstylesheet: "spec/assets/html.css", datauriimage: true}).convert("test", <<~"INPUT", false)
INPUT
html = File.read("test.html").sub(/^.*/m, '').
sub(%r{.*$}m, "")
expect(xmlpp(html.gsub(%r{src="data:image/png;base64,[^"]+"}, %{src="data:image/png;base64,_"}))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Figure 1 — Split-it-right sample divider
OUTPUT
end
it "encodes images in HTML as data URIs, using relative file location" do
FileUtils.rm_f "spec/test.html"
FileUtils.rm_rf "spec/test_htmlimages"
IsoDoc::HtmlConvert.new({htmlstylesheet: "spec/assets/html.css", datauriimage: true}).convert("spec/test", <<~"INPUT", false)
INPUT
html = File.read("spec/test.html").sub(/^.*/m, '').
sub(%r{.*$}m, "")
expect(xmlpp(html.gsub(%r{src="data:image/png;base64,[^"]+"}, %{src="data:image/png;base64,_"}))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Figure 1 — Split-it-right sample divider
OUTPUT
end
it "processes IsoXML terms for HTML" do
FileUtils.rm_f "test.html"
FileUtils.rm_f "test.doc"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
Terms and Definitionspaddyrice
rice retaining its husk after threshing
Foreign seeds, husks, bran, sand, dust.
A
A
3.1
The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here
paddypaddy ricerough ricecargo rice
rice retaining its husk after threshing
A
The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.
A
The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.
3.1
INPUT
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
expect(html).to match(%r{
1\.1\.
})
expect(html).to match(%r{
1\.2\.
})
end
it "processes empty term modifications" do
FileUtils.rm_f "test.html"
FileUtils.rm_f "test.doc"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
Terms and Definitionspaddyrice
rice retaining its husk after threshing
Foreign seeds, husks, bran, sand, dust.
A
A
3.1
INPUT
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
expect(html).to include '[SOURCE: ISO 7301:2011, Clause 3.1, modified]'
end
it "creates continuation styles for multiparagraph list items in Word" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
OUTPUT
end
it "does not lose HTML escapes in postprocessing" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
XML code
<xml> &
INPUT
html = File.read("test.html").sub(/^.*/m, '').
sub(%r{.*$}m, "")
expect(xmlpp(html)).to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
<xml> &
Figure 1 — XML code
OUTPUT
end
it "does not lose HTML escapes in postprocessing (Word)" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
XML code
<xml> &
INPUT
word = File.read("test.doc").sub(/^.*
OUTPUT
end
it "propagates example style to paragraphs in postprocessing (Word)" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
OUTPUT
end
it "deals with image captions (Word)" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
INPUT
word = File.read("test.doc").sub(/^.*
Figure 1 — Typical arrangement of the far-field scan set-up
OUTPUT
end
it "deals with empty table titles (Word)" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
OUTPUT
end
it "propagates alignment of table cells (Word)" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("test", <<~"INPUT", false)
OUTPUT
end
it "cleans up boilerplate" do
expect(xmlpp(IsoDoc::HtmlConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", filename: "test"}).html_preface(Nokogiri::XML(<<~INPUT)).to_xml).sub(/^.*.*$}m, "")).to be_equivalent_to xmlpp(<<~"OUTPUT")
Copyright
License
Legal
Feedback
INPUT
Feedback
Legal
License
Copyright
OUTPUT
end
it "cleans up boilerplate (Word)" do
expect(xmlpp(IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", filename: "test"}).word_cleanup(Nokogiri::XML(<<~INPUT)).to_xml).sub(/^.*.*$}m, "")).to be_equivalent_to xmlpp(<<~"OUTPUT")
Copyright
License
Legal
Feedback
INPUT
Feedback
Legal
License
Copyright
OUTPUT
end
it "deals with landscape and portrait pagebreaks (Word)" do
FileUtils.rm_f "test.doc"
IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", filename: "test"}).convert("test", <<~"INPUT", false)
Document title1.2enpublishedarticlePreface 1
This is a paragraph
A
B
Preface 1.1
On my side
Upright again
Preface 1.3
And still upright
Foreword
For further information on the Foreword, see ISO/IEC Directives, Part 2, 2016, Clause 12.
A
B
C
D
B
And up
INPUT
expect(File.exist?("test.doc")).to be true
html = File.read("test.doc", encoding: "UTF-8")
expect(html).to include "div.WordSection2_0 {page:WordSection2P;}"
expect(html).to include "div.WordSection2_1 {page:WordSection2L;}"
expect(html).to include "div.WordSection3_0 {page:WordSection3P;}"
expect(html).to include "div.WordSection3_1 {page:WordSection3L;}"
expect(xmlpp(html.sub(/^.*.*$}m, ""))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Introduction
This is a
paragraph
A
B
Preface 1.1
On my side
Upright again
Preface 1.3
And still upright
Document title
Foreword
NOTE 1
For further information on the Foreword, see
ISO/IEC Directives, Part 2, 2016, Clause 12.
Table 1
A
B
C
D
NOTE
B
And up
OUTPUT
end
it "expands out nested tables in Word" do
expect(xmlpp(IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", filename: "test"}).word_cleanup(Nokogiri::XML(<<~INPUT)).to_xml).sub(/^.*.*$}m, "")).to be_equivalent_to xmlpp(<<~"OUTPUT")