# frozen_string_literal: true
require "spec_helper"
require "fileutils"
options = { wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss" }
RSpec.describe IsoDoc do
it "generates file based on string input" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new(
{ wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss",
filename: "test" },
).convert("test", <<~"INPUT", false)
test
These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
expect(html).to match(%r{
test})
expect(html).to match(/another empty stylesheet/)
expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/})
expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
end
it "generates file in a remote directory" do
FileUtils.rm_f "spec/assets/test.doc"
FileUtils.rm_f "spec/assets/test.html"
IsoDoc::HtmlConvert.new(
{ wordstylesheet: "word.css",
htmlstylesheet: "html.scss",
filename: "test" },
).convert("spec/assets/test", <<~"INPUT", false)
test
These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?("spec/assets/test.html")).to be true
html = File.read("spec/assets/test.html")
expect(html).to match(%r{test})
expect(html).to match(/another empty stylesheet/)
end
it "ignores Liquid markup in the document body" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
input = <<~INPUT
test
{% elif %}These results are based on a study carried out on three different types of kernel.
INPUT
IsoDoc::HtmlConvert.new(wordstylesheet: "spec/assets/word.css")
.convert("test", input, false)
IsoDoc::WordConvert.new({ wordstylesheet: "spec/assets/word.css" })
.convert("test", input, false)
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
expect(html).to match(%r/\{% elif %}/)
expect(File.exist?("test.doc")).to be true
html = File.read("test.doc")
expect(html).to match(%r/\{% elif %}/)
end
it "generates HTML output docs with null configuration" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new({ wordstylesheet: "spec/assets/word.css" })
.convert("test", <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
expect(html).not_to match(%r{test})
expect(html).not_to match(/another empty stylesheet/)
expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/})
expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
expect(html).not_to match(/html-override/)
end
it "generates Word output docs with null configuration" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new(
{ wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss" },
).convert("test", <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
expect(File.exist?("test.doc")).to be true
word = File.read("test.doc")
expect(word).to match(/one empty stylesheet/)
expect(word).to match(/div\.table_container/)
expect(word).not_to match(/word-override/)
end
it "generates HTML output docs with null configuration from file" do
FileUtils.rm_f "spec/assets/iso.doc"
FileUtils.rm_f "spec/assets/iso.html"
IsoDoc::HtmlConvert.new(
{ wordstylesheet: "word.css",
htmlstylesheet: "html.scss" },
).convert("spec/assets/iso.xml", nil, false)
expect(File.exist?("spec/assets/iso.html")).to be true
html = File.read("spec/assets/iso.html")
expect(html).to match(/another empty stylesheet/)
expect(html).to match(%r{https://use.fontawesome.com})
expect(html).to match(%r{libs/jquery})
expect(html).to include "$('#toggle')"
expect(html).not_to match(/CDATA/)
end
it "generates Headless HTML output docs with null configuration from file" do
FileUtils.rm_f "spec/assets/iso.html"
IsoDoc::HeadlessHtmlConvert.new(
{ wordstylesheet: "word.css",
htmlstylesheet: "html.scss" },
).convert("spec/assets/iso.xml", nil, false)
expect(File.exist?("spec/assets/iso.headless.html")).to be true
html = File.read("spec/assets/iso.headless.html")
expect(html).not_to match(/another empty stylesheet/)
expect(html).not_to match(%r{https://use.fontawesome.com})
expect(html).not_to match(%r{libs/jquery})
expect(html).not_to match(%r{
These results are based on a study carried out on three different types of kernel.
INPUT
html = File.read("test.html")
expect(html).to match(/another empty stylesheet/)
expect(html).to match(/p \{[^}]*?font-family: Zapf/m)
expect(html).to match(/code \{[^}]*?font-family: Consolas/m)
expect(html).to match(/h1 \{[^}]*?font-family: Comic Sans/m)
expect(html).to match(/p \{[^}]*?font-size: 30pt/m)
expect(html).to match(/code \{[^}]*?font-size: 29pt/m)
expect(html).to match(/p\.note \{[^}]*?font-size: 28pt/m)
expect(html).to match(/aside \{[^}]*?font-size: 27pt/m)
expect(html).to match(/an empty html cover page/)
expect(html).to match(/an empty html intro page/)
expect(html).to match(/This is > a script/)
expect(html).to match(/This is > also a script/)
expect(html).not_to match(/CDATA/)
expect(html).to match(%r{Antaŭparolo})
expect(html).to match(%r{html-override})
end
it "generates HTML output docs with default fonts" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new(
{ htmlstylesheet: "spec/assets/html.scss",
htmlcoverpage: "spec/assets/htmlcover.html",
htmlintropage: "spec/assets/htmlintro.html",
scripts: "spec/assets/scripts.html",
i18nyaml: "spec/assets/i18n.yaml",
ulstyle: "l1",
olstyle: "l2" },
).convert("test", <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
These results are based on a study carried out on three different types of kernel.
INPUT
html = Nokogiri::XML(File.read("test.html")).at("//body")
html.xpath("//script").each(&:remove)
expect(html.to_xml).to be_equivalent_to <<~OUTPUT
/* an empty html cover page */
/* an empty html intro page */
Antaŭparolo
These results are based on a study carried out on three different types of kernel.
OUTPUT
end
it "converts definition lists to tables for Word" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new(
{ wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss" },
).convert("test", <<~"INPUT", false)
OUTPUT
end
it "populates Word template with terms reference labels" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new(
{ wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss" },
).convert("test", <<~"INPUT", false)
1.Terms and Definitions1.1.paddy
rice retaining its husk after threshing
ISO 7301:2011, Clause 3.1
The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here
OUTPUT
end
it "populates Word ToC with custom levels" do
FileUtils.rm_f "test.doc"
IsoDoc::WordConvert.new(
{ wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss",
wordintropage: "spec/assets/wordintro.html",
doctoclevels: 3 },
).convert("test", <<~"INPUT", false)
Clause 4Introduction to this
Formerly denoted as 15 % (m/m).
Clause 4.2
A
Formerly denoted as 15 % (m/m).
Clause 4.2.1
INPUT
word = File.read("test.doc")
.sub(/^.*
OUTPUT
end
it "generates HTML output with custom ToC levels function" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new({ htmltoclevels: 3 })
.convert("test", <<~"INPUT", false)
These results are based on a study carried out on three different types of kernel.
INPUT
html = File.read("test.html")
toclevel = <<~"TOCLEVEL"
function toclevel() { return "h1:not(:empty):not(.TermNum):not(.noTOC),h2:not(:empty):not(.TermNum):not(.noTOC),h3:not(:empty):not(.TermNum):not(.noTOC)";}
TOCLEVEL
expect(html).to include toclevel
end
it "reorders footnote numbers in HTML" do
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new(
{ wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss",
wordintropage: "spec/assets/wordintro.html" },
).convert("test", <<~"INPUT", false)
Clause 4
This is a footnote.
Introduction to this
Formerly denoted as 15 % (m/m).
Clause 4.2
OUTPUT
end
it "moves images in HTML" do
FileUtils.rm_f "test.html"
FileUtils.rm_rf "test_htmlimages"
IsoDoc::HtmlConvert.new(
{ wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss" },
).convert("test", <<~"INPUT", false)
INPUT
html = File.read("test.html")
.sub(/^.*/m, '')
.sub(%r{.*$}m, "")
expect(`ls test_htmlimages`).to match(/\.png$/)
expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, "/_.png")))
.to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Split-it-right sample divider
OUTPUT
end
it "moves images in HTML" do
FileUtils.rm_f "test.html"
FileUtils.rm_rf "test_htmlimages"
IsoDoc::HtmlConvert.new(
{ baseassetpath: "spec/assets",
wordstylesheet: "word.css",
htmlstylesheet: "html.scss" },
).convert("test", <<~"INPUT", false)
INPUT
html = File.read("test.html")
.sub(/^.*/m, '')
.sub(%r{.*$}m, "")
expect(`ls test_htmlimages`).to match(/\.png$/)
expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, "/_.png")))
.to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Split-it-right sample divider
OUTPUT
end
describe "mathvariant to plain" do
context "when `mathvariant` attr equal to `script`" do
it "converts mathvariant text chars into associated plain chars" do
FileUtils.rm_f "test.html"
FileUtils.rm_rf "test_htmlimages"
input = <<~INPUT
Clause
INPUT
output = <<~OUTPUT
Clause
OUTPUT
IsoDoc::HtmlConvert.new({}).convert("test", input, false)
html = File.read("test.html")
.sub(/^.*/m, '')
.sub(%r{.*$}m, "")
expect(html).to(be_equivalent_to(output))
end
end
context "when complex `mathvariant` combinations" do
it "converts mathvariant text chars into associated plain chars" do
FileUtils.rm_f "test.html"
FileUtils.rm_rf "test_htmlimages"
input = <<~INPUT
Clause
INPUT
output = <<~OUTPUT
Clause
OUTPUT
IsoDoc::HtmlConvert.new({}).convert("test", input, false)
html = File.read("test.html")
.sub(/^.*/m, '')
.sub(%r{.*$}m, "")
expect(html).to(be_equivalent_to(output))
end
end
end
it "moves images in HTML with no file suffix" do
FileUtils.rm_f "test.html"
FileUtils.rm_rf "test_htmlimages"
IsoDoc::HtmlConvert.new(
{ wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss" },
).convert("test", <<~"INPUT", false)
INPUT
html = File.read("test.html")
.sub(/^.*/m, '')
.sub(%r{.*$}m, "")
expect(`ls test_htmlimages`).to match(/\.png$/)
expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, "/_.png")))
.to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Split-it-right sample divider
OUTPUT
end
it "moves images in HTML, using relative file location" do
FileUtils.rm_f "spec/test.html"
FileUtils.rm_rf "spec/test_htmlimages"
IsoDoc::HtmlConvert
.new(wordstylesheet: "assets/word.css",
htmlstylesheet: "assets/html.scss")
.convert("spec/test", <<~"INPUT", false)
INPUT
html = File.read("spec/test.html")
.sub(/^.*/m, '')
.sub(%r{.*$}m, "")
expect(`ls test_htmlimages`).to match(/\.png$/)
expect(xmlpp(html.gsub(/\/[0-9a-f-]+\.png/, "/_.png")))
.to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Split-it-right sample divider
OUTPUT
end
it "encodes images in HTML as data URIs" do
FileUtils.rm_f "test.html"
FileUtils.rm_rf "test_htmlimages"
IsoDoc::HtmlConvert
.new(htmlstylesheet: "spec/assets/html.scss", datauriimage: true)
.convert("test", <<~"INPUT", false)
INPUT
html = File.read("test.html")
.sub(/^.*/m, '')
.sub(%r{.*$}m, "")
expect(xmlpp(html
.gsub(%r{src="data:image/png;base64,[^"]+"}, %{src="data:image/png;base64,_"})))
.to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Split-it-right sample divider
OUTPUT
end
it "encodes images in HTML as data URIs, using relative file location" do
FileUtils.rm_f "spec/test.html"
FileUtils.rm_rf "spec/test_htmlimages"
IsoDoc::HtmlConvert
.new({ htmlstylesheet: "assets/html.scss", datauriimage: true })
.convert("spec/test", <<~"INPUT", false)
INPUT
html = File.read("spec/test.html")
.sub(/^.*/m, '')
.sub(%r{.*$}m, "")
expect(xmlpp(html
.gsub(%r{src="data:image/png;base64,[^"]+"}, %{src="data:image/png;base64,_"})))
.to be_equivalent_to xmlpp(<<~"OUTPUT")
Foreword
Split-it-right sample divider
OUTPUT
end
it "processes IsoXML terms for HTML" do
FileUtils.rm_f "test.html"
FileUtils.rm_f "test.doc"
IsoDoc::HtmlConvert.new(options)
.convert("test", <<~"INPUT", false)
Terms and Definitions1.1.paddyrice
rice retaining its husk after threshing
Foreign seeds, husks, bran, sand, dust.
A
A
3.1
The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here
1.2.paddypaddy ricerough ricecargo rice
rice retaining its husk after threshing
A
The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.
A
The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.
3.1
INPUT
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
expect(html).to match(%r{
1\.1\.
})
expect(html).to match(%r{
1\.2\.
})
end
it "processes empty term modifications" do
FileUtils.rm_f "test.html"
FileUtils.rm_f "test.doc"
IsoDoc::HtmlConvert.new(options)
.convert("test", <<~"INPUT", false)
Terms and Definitionspaddyrice
rice retaining its husk after threshing
Foreign seeds, husks, bran, sand, dust.
A
A
ISO 7301:2011, Clause 3.1
INPUT
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
expect(html).to include '[SOURCE: ISO 7301:2011, Clause 3.1, modified]'
end
it "creates continuation styles for multiparagraph list items in Word" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new(options)
.convert("test", <<~"INPUT", false)
OUTPUT
end
it "does not lose HTML escapes in postprocessing" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
input = <<~INPUT
XML code
<xml> &
INPUT
IsoDoc::HtmlConvert.new(options).convert("test", input, false)
html = File.read("test.html")
.sub(/^.*/m, '')
.sub(%r{.*$}m, "")
expect(xmlpp(html)).to be_equivalent_to xmlpp(<<~"OUTPUT")
OUTPUT
end
it "propagates example style to paragraphs in postprocessing (Word)" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new(options).convert("test", <<~"INPUT", false)
OUTPUT
end
it "deals with image captions (Word)" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new(options)
.convert("test", <<~"INPUT", false)
INPUT
word = File.read("test.doc")
.sub(/^.*
OUTPUT
end
it "deals with empty table titles (Word)" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new(options)
.convert("test", <<~"INPUT", false)
OUTPUT
end
it "propagates alignment of table cells (Word)" do
FileUtils.rm_f "test.doc"
FileUtils.rm_f "test.html"
IsoDoc::WordConvert.new(options)
.convert("test", <<~"INPUT", false)
For further information on the Foreword, see ISO/IEC Directives, Part 2, 2016, Clause 12.
A
B
C
D
B
And up
Annex 1
INPUT
expect(File.exist?("test.doc")).to be true
html = File.read("test.doc", encoding: "UTF-8")
expect(html).to include "div.WordSection2_0 {page:WordSection2P;}"
expect(html).to include "div.WordSection2_1 {page:WordSection2L;}"
expect(html).to include "div.WordSection3_0 {page:WordSection3P;}"
expect(html).to include "div.WordSection3_1 {page:WordSection3P;}"
expect(html).to include "div.WordSection3_2 {page:WordSection3L;}"
expect(xmlpp(html.sub(/^.*.*$}m, ""))).to be_equivalent_to xmlpp(<<~"OUTPUT")
Preface 1
This is a
paragraph
A
B
Preface 1.1
On my side
Upright again
Preface 1.3
And still upright
Document title
Foreword
For further information on the Foreword, see
ISO/IEC Directives, Part 2, 2016, Clause 12.
A
B
C
D
B
And up
Annex 1
OUTPUT
end
it "expands out nested tables in Word" do
input = <<~INPUT
Requirement 1:
requirement label
Requirement 1-1:
Description text
1.2.Second sample
Requirement 2:
requirement label
Requirement 2-1:
Description text
Requirement 2-2:
Description text
INPUT
output = <<~OUTPUT
Requirement 1:
requirement label
Requirement 1-1:
Description text
1.2.
Second sample
Requirement 2:
requirement label
Requirement 2-1:
Description text
Requirement 2-2:
Description text
OUTPUT
expect(xmlpp(IsoDoc::WordConvert
.new(wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss", filename: "test")
.word_cleanup(Nokogiri::XML(input)).to_xml)
.sub(/^.*.*$}m, ""))
.to be_equivalent_to xmlpp(output)
end
it "allocate widths to tables (Word)" do
input = <<~INPUT
Foreword
Table 1 — Repeatability and reproducibility of
husked
rice yield
1
OUTPUT
expect(xmlpp(IsoDoc::WordConvert
.new(wordstylesheet: "spec/assets/word.css",
htmlstylesheet: "spec/assets/html.scss", filename: "test")
.word_cleanup(Nokogiri::XML(input)).to_xml)
.sub(/^.*.*$}m, ""))
.to be_equivalent_to xmlpp(output)
end
it "generates bare HTML file" do
FileUtils.rm_f "test.html"
IsoDoc::HtmlConvert.new(
{ bare: true,
htmlstylesheet: "spec/assets/html.scss",
filename: "test" },
).convert("test", <<~"INPUT", false)
testI am boilerplate
These results are based on a study carried out on three different types of kernel.
INPUT
output = <<~OUTPUT
Foreword
These results are based on a study carried out on three
different types of kernel.
test
OUTPUT
expect(File.exist?("test.html")).to be true
html = File.read("test.html")
.sub(%r{^.*.*$}m, "")
.gsub(%r{}m, "")
.sub(%r{(\s+)+}m, "")
expect(xmlpp(html)).to be_equivalent_to xmlpp(output)
end
it "cleans up lists (HTML)" do
input = <<~INPUT
N1
A
B
N2
C
N3
INPUT
output = <<~OUTPUT
A
N1
B
N2
C
N3
OUTPUT
expect(xmlpp(IsoDoc::HtmlConvert
.new(htmlstylesheet: "spec/assets/html.scss", filename: "test")
.html_cleanup(Nokogiri::XML(input)).to_xml)
.sub(/^.*.*$}m, ""))
.to be_equivalent_to xmlpp(output)
end
end