require "test_helper"
require "govspeak_test_helper"
require "ostruct"
class GovspeakTest < Minitest::Test
include GovspeakTestHelper
test "simple smoke-test" do
rendered = Govspeak::Document.new("*this is markdown*").to_html
assert_equal "
this is markdown
\n", rendered
end
test "simple smoke-test for simplified API" do
rendered = Govspeak::Document.to_html("*this is markdown*")
assert_equal "this is markdown
\n", rendered
end
test "strips forbidden unicode characters" do
rendered = Govspeak::Document.new(
"this is text with forbidden characters \u0008\u000b\ufffe\u{2ffff}\u{5fffe}",
).to_html
assert_equal "this is text with forbidden characters
\n", rendered
end
test "highlight-answer block extension" do
rendered = Govspeak::Document.new("this \n{::highlight-answer}Lead in to *BIG TEXT*\n{:/highlight-answer}").to_html
assert_equal %(this
\n\n\n), rendered
end
test "stat-headline block extension" do
rendered = Govspeak::Document.new("this \n{stat-headline}*13.8bn* Age of the universe in years{/stat-headline}").to_html
assert_equal %(this
\n\n\n
13.8bn Age of the universe in years
\n
\n), rendered
end
test "extracts headers with text, level and generated id" do
document = Govspeak::Document.new %(
# Big title
### Small subtitle
## Medium title
)
assert_equal [
Govspeak::Header.new("Big title", 1, "big-title"),
Govspeak::Header.new("Small subtitle", 3, "small-subtitle"),
Govspeak::Header.new("Medium title", 2, "medium-title"),
], document.headers
end
test "extracts different ids for duplicate headers" do
document = Govspeak::Document.new("## Duplicate header\n\n## Duplicate header")
assert_equal [
Govspeak::Header.new("Duplicate header", 2, "duplicate-header"),
Govspeak::Header.new("Duplicate header", 2, "duplicate-header-1"),
], document.headers
end
test "extracts headers when nested inside blocks" do
document = Govspeak::Document.new %(
# First title
## Nested subtitle
### Double nested subtitle
### Second double subtitle
)
assert_equal [
Govspeak::Header.new("First title", 1, "first-title"),
Govspeak::Header.new("Nested subtitle", 2, "nested-subtitle"),
Govspeak::Header.new("Double nested subtitle", 3, "double-nested-subtitle"),
Govspeak::Header.new("Second double subtitle", 3, "second-double-subtitle"),
], document.headers
end
test "extracts headers with explicitly specified ids" do
document = Govspeak::Document.new %(
# First title
## Second title {#special}
)
assert_equal [
Govspeak::Header.new("First title", 1, "first-title"),
Govspeak::Header.new("Second title", 2, "special"),
], document.headers
end
test "extracts text with no HTML and normalised spacing" do
input = "# foo\n\nbar baz "
doc = Govspeak::Document.new(input)
assert_equal "foo bar baz", doc.to_text
end
test "trailing space after the address should not prevent parsing" do
input = %($A
123 Test Street
Testcase Cliffs
Teston
0123 456 7890 $A )
doc = Govspeak::Document.new(input)
assert_equal %(\n\n123 Test Street
Testcase Cliffs
Teston
0123 456 7890 \n
\n), doc.to_html
end
test "should convert barchart" do
input = <<~GOVSPEAK
|col|
|---|
|val|
{barchart}
GOVSPEAK
html = Govspeak::Document.new(input).to_html
assert_equal %(\n \n \n col | \n
\n \n \n \n val | \n
\n \n
\n), html
end
test "should convert barchart with stacked compact and negative" do
input = <<~GOVSPEAK
|col|
|---|
|val|
{barchart stacked compact negative}
GOVSPEAK
html = Govspeak::Document.new(input).to_html
assert_equal %(\n \n \n col | \n
\n \n \n \n val | \n
\n \n
\n), html
end
test "address div is separated from paragraph text by a couple of line-breaks" do
# else kramdown processes address div as part of paragraph text and escapes HTML
input = %(Paragraph1
$A
123 Test Street
Testcase Cliffs
Teston
0123 456 7890 $A)
doc = Govspeak::Document.new(input)
assert_equal %(Paragraph1
\n\n\n123 Test Street
Testcase Cliffs
Teston
0123 456 7890 \n
\n), doc.to_html
end
test_given_govspeak("^ I am very informational ^") do
assert_html_output %(
)
assert_text_output "I am very informational"
end
test "processing an extension does not modify the provided input" do
input = "^ I am very informational"
Govspeak::Document.new(input).to_html
assert_equal "^ I am very informational", input
end
test_given_govspeak "The following is very informational\n^ I am very informational ^" do
assert_html_output %(
The following is very informational
)
assert_text_output "The following is very informational I am very informational"
end
test_given_govspeak "^ I am very informational" do
assert_html_output %(
)
assert_text_output "I am very informational"
end
test_given_govspeak "@ I am very important @" do
assert_html_output %(
)
assert_text_output "I am very important"
end
test_given_govspeak "
The following is very important
@ I am very important @
" do
assert_html_output %(
The following is very important
)
assert_text_output "The following is very important I am very important"
end
test_given_govspeak "% I am very helpful %" do
assert_html_output %(
)
assert_text_output "I am very helpful"
end
test_given_govspeak "The following is very helpful\n% I am very helpful %" do
assert_html_output %(
The following is very helpful
)
assert_text_output "The following is very helpful I am very helpful"
end
test_given_govspeak "## Hello ##\n\n% I am very helpful %\r\n### Young Workers ###\n\n" do
assert_html_output %(
Hello
Young Workers
)
assert_text_output "Hello I am very helpful Young Workers"
end
test_given_govspeak "% I am very helpful" do
assert_html_output %(
)
assert_text_output "I am very helpful"
end
test_given_govspeak "This is a [link](http://www.gov.uk) isn't it?" do
assert_html_output 'This is a link isn’t it?
'
assert_text_output "This is a link isn’t it?"
end
test_given_govspeak "This is a [link with an at sign in it](http://www.gov.uk/@dg/@this) isn't it?" do
assert_html_output 'This is a link with an at sign in it isn’t it?
'
assert_text_output "This is a link with an at sign in it isn’t it?"
end
test_given_govspeak "
HTML
*[HTML]: Hyper Text Markup Language" do
assert_html_output %(HTML
)
assert_text_output "HTML"
end
test_given_govspeak "x[a link](http://rubyforge.org)x" do
assert_html_output 'a link
'
assert_text_output "a link"
end
test_given_govspeak "x[an xx link](http://x.com)x" do
assert_html_output 'an xx link
'
end
test_given_govspeak "[internal link](http://www.gov.uk)" do
assert_html_output 'internal link
'
end
test_given_govspeak "[link with no host is assumed to be internal](/)" do
assert_html_output 'link with no host is assumed to be internal
'
end
test_given_govspeak "[internal link with rel attribute keeps it](http://www.gov.uk){:rel='next'}" do
assert_html_output 'internal link with rel attribute keeps it
'
end
test_given_govspeak "[external link without x markers](http://www.google.com)" do
assert_html_output 'external link without x markers
'
end
# Based on Kramdown inline attribute list (IAL) test:
# https://github.com/gettalong/kramdown/blob/627978525cf5ee5b290d8a1b8675aae9cc9e2934/test/testcases/span/01_link/link_defs_with_ial.text
test_given_govspeak "External link definitions with [attr] and [attr 2] and [attr 3] and [attr before]\n\n[attr]: http://example.com 'title'\n{: hreflang=\"en\" .test}\n\n[attr 2]: http://example.com 'title'\n{: hreflang=\"en\"}\n{: .test}\n\n[attr 3]: http://example.com\n{: .test}\ntest\n\n{: hreflang=\"en\"}\n{: .test}\n[attr before]: http://example.com" do
assert_html_output "External link definitions with attr and attr 2 and attr 3 and attr before
\n\ntest
"
end
test_given_govspeak "External link with [inline attribute list] (IAL)\n\n[inline attribute list]: http://example.com 'title'\n{: hreflang=\"en\" .test}" do
assert_html_output 'External link with inline attribute list (IAL)
'
end
test_given_govspeak "[external link with rel attribute](http://www.google.com){:rel='next'}" do
assert_html_output 'external link with rel attribute
'
end
test_given_govspeak "Text before [an external link](http://www.google.com)" do
assert_html_output 'Text before an external link
'
end
test_given_govspeak "[An external link](http://www.google.com) with text afterwards" do
assert_html_output 'An external link with text afterwards
'
end
test_given_govspeak "Text before [an external link](http://www.google.com) and text afterwards" do
assert_html_output 'Text before an external link and text afterwards
'
end
test_given_govspeak "![image with external url](http://www.example.com/image.jpg)" do
assert_html_output ''
end
test "should be able to override default 'document_domains' option" do
html = Govspeak::Document.new("[internal link](http://www.not-external.com)", document_domains: %w[www.not-external.com]).to_html
refute html.include?('rel="external"'), "should not consider www.not-external.com as an external url"
end
test "should be able to supply multiple domains for 'document_domains' option" do
html = Govspeak::Document.new("[internal link](http://www.not-external-either.com)", document_domains: %w[www.not-external.com www.not-external-either.com]).to_html
refute html.include?('rel="external"'), "should not consider www.not-external-either.com as an external url"
end
test "should be able to override default 'input' option" do
html = Govspeak::Document.new("[external link](http://www.external.com)", input: "kramdown").to_html
refute html.include?('rel="external"'), "should not automatically add rel external attribute"
end
test "should not be able to override default 'entity output' option" do
html = Govspeak::Document.new(">", entity_output: :numeric).to_html
assert html.include?(">")
end
test "should assume a link with an invalid uri is internal" do
html = Govspeak::Document.new("[link](:invalid-uri)").to_html
refute html.include?('rel="external"')
end
test "should treat a mailto as internal" do
html = Govspeak::Document.new("[link](mailto:a@b.com)").to_html
refute html.include?('rel="external"')
assert_equal %(link
\n), deobfuscate_mailto(html)
end
test "permits mailto:// URI" do
html = Govspeak::Document.new("[link](mailto://a@b.com)").to_html
assert_equal %(link
\n), deobfuscate_mailto(html)
end
test "permits dud mailto: URI" do
html = Govspeak::Document.new("[link](mailto:)").to_html
assert_equal %(link
\n), deobfuscate_mailto(html)
end
test "permits trailing whitespace in an URI" do
Govspeak::Document.new("[link](http://example.com/%20)").to_html
end
# Regression test - the surrounded_by helper doesn't require the closing x
# so 'xaa' was getting picked up by the external link helper above
# TODO: review whether we should require closing symbols for these extensions
# need to check all existing content.
test_given_govspeak "xaa" do
assert_html_output "xaa
"
assert_text_output "xaa"
end
test_given_govspeak "
$!
rainbow
$!" do
assert_html_output %(
)
assert_text_output "rainbow"
end
test_given_govspeak "$C help, send cake $C" do
assert_html_output %(
)
assert_text_output "help, send cake"
end
test_given_govspeak "
$A
street
road
$A" do
assert_html_output %(
)
assert_text_output "street road"
end
test_given_govspeak "
$P
$I
help
$I
$P" do
assert_html_output %()
assert_text_output "help"
end
test_given_govspeak "
$D
can you tell me how to get to...
$D" do
assert_html_output %(
)
assert_text_output "can you tell me how to get to…"
end
test_given_govspeak "
$CTA
Click here to start the tool
$CTA" do
assert_html_output %(
Click here to start the tool
)
assert_text_output "Click here to start the tool"
end
test_given_govspeak "
Here is some text\n
$CTA
Click here to start the tool
$CTA
" do
assert_html_output %(
Here is some text
Click here to start the tool
)
end
test_given_govspeak "
$CTA
This is a test:
s1. This is number 1.
s2. This is number 2.
s3. This is number 3.
s4. This is number 4.
$CTA" do
assert_html_output %(
This is a test:
-
This is number 1.
-
This is number 2.
-
This is number 3.
-
This is number 4.
)
end
test_given_govspeak "
$CTA
[external link](http://www.external.com) some text
$CTA
" do
assert_html_output %(
)
end
test_given_govspeak "
$CTA
[internal link](http://www.not-external.com) some text
$CTA", document_domains: %w[www.not-external.com] do
assert_html_output %(
)
end
test_given_govspeak "
$CTA
Click here to start the tool
$CTA
$C
Here is some text
$C
" do
assert_html_output %(
Click here to start the tool
)
end
test_given_govspeak "
[internal link](http://www.not-external.com)\n
$CTA
Click here to start the tool
$CTA", document_domains: %w[www.not-external.com] do
assert_html_output %(
internal link
Click here to start the tool
)
end
test_given_govspeak "
$CTA
Click here to start the tool[^1]
$CTA
[^1]: Footnote definition one
" do
assert_html_output %(
Click here to start the tool
)
end
test_given_govspeak "
$CTA
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Fusce felis ante[^1], lobortis non quam sit amet, tempus interdum justo.
$CTA
$CTA
Pellentesque quam enim, egestas sit amet congue sit amet[^2], ultrices vitae arcu.
Fringilla, metus dui scelerisque est.
$CTA
[^1]: Footnote definition one
[^2]: Footnote definition two
" do
assert_html_output %(
Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.
Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
Fringilla, metus dui scelerisque est.
)
end
test_given_govspeak "
$CTA
Click here to start the tool[^1]
$CTA
Lorem ipsum dolor sit amet[^2]
[^1]: Footnote definition 1
[^2]: Footnote definition 2
" do
assert_html_output %(
Click here to start the tool
Lorem ipsum dolor sit amet
)
end
test_given_govspeak "
$CTA
Contact the SGD on 0800 000 0000 or contact the class on 0800 001 0001
$CTA
*[class]: Other Government Department
*[SGD]: Some Government Department
" do
assert_html_output %(
Contact the SGD on 0800 000 0000 or contact the class on 0800 001 0001
)
end
test_given_govspeak "
$CTA
Welcome to the GOV.UK website
$CTA
*[GOV.UK]: The official UK government website
*[website]: A collection of web pages, such as GOV.UK
" do
assert_html_output %(
Welcome to the GOV.UK website
)
end
test_given_govspeak "
$CTA
Please email
$CTA
*[GOV.UK]: The official UK government website
" do
assert_html_output %(
)
end
test_given_govspeak "
$CTA
Welcome to the GOV.UK[^1]
$CTA
[^1]: GOV.UK is the official UK government website
*[GOV.UK]: The official UK government website
*[website]: A collection of web pages, such as GOV.UK
*[GOV.UK]: The official UK government website
" do
assert_html_output %(
)
end
test "CTA with image" do
given_govspeak "
$CTA
[Image:image-id]
$CTA
Some text
", images: [build_image] do
assert_html_output %(
Some text
)
end
end
test_given_govspeak "
1. rod
2. jane
3. freddy" do
assert_html_output "\n - rod
\n - jane
\n - freddy
\n
"
assert_text_output "rod jane freddy"
end
test_given_govspeak "
s1. zippy
s2. bungle
s3. george
" do
assert_html_output %(
-
zippy
-
bungle
-
george
)
assert_text_output "zippy bungle george"
end
test_given_govspeak "
- unordered
- list
s1. step
s2. list
" do
assert_html_output %(
-
step
-
list
)
assert_text_output "unordered list step list"
end
test_given_govspeak "
$LegislativeList
* 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.
Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
fringilla, metus dui scelerisque est.
* a) A list item
* b) Another list item
* 1.1 Second entry
Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus
$EndLegislativeList
" do
assert_html_output %{
-
1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit.
Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.
Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu.
fringilla, metus dui scelerisque est.
-
a) A list item
-
b) Another list item
-
1.1 Second entry
Curabitur pretium pharetra sapien, a feugiat arcu euismod eget.
Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus
}
end
test_given_govspeak "
$LegislativeList
* 1. The quick
* 2. Brown fox
* a) Jumps over
* b) The lazy
* 3. Dog
$EndLegislativeList
" do
assert_html_output %{
- 1. The quick
- 2. Brown fox
- a) Jumps over
- b) The lazy
- 3. Dog
}
end
test_given_govspeak "
$LegislativeList
* 1. Item 1[^1]
* 2. Item 2[^2]
* 3. Item 3
$EndLegislativeList
[^1]: Footnote definition one
[^2]: Footnote definition two
" do
assert_html_output %(
- 1. Item 1
- 2. Item 2
- 3. Item 3
)
end
test_given_govspeak "
$LegislativeList
* 1. Item 1[^1]
* 2. Item 2
* 3. Item 3
$EndLegislativeList
This is a paragraph with a footnote[^2].
$LegislativeList
* 1. Item 1
* 2. Item 2[^3]
* 3. Item 3
$EndLegislativeList
[^1]: Footnote definition one
[^2]: Footnote definition two
[^3]: Footnote definition two
" do
assert_html_output %(
- 1. Item 1
- 2. Item 2
- 3. Item 3
This is a paragraph with a footnote.
- 1. Item 1
- 2. Item 2
- 3. Item 3
)
end
test_given_govspeak "
$LegislativeList
* 1. Item 1[^1]
* 2. Item 2[^2]
* 3. Item 3[^3]
$EndLegislativeList
This is a paragraph with a footnote[^4].
$LegislativeList
* 1. Item 1[^5]
* 2. Item 2[^6]
* 3. Item 3[^7]
$EndLegislativeList
This is a paragraph with a footnote[^8].
$LegislativeList
* 1. Item 1[^9]
* 2. Item 2[^10]
* 3. Item 3[^11]
$EndLegislativeList
This is a paragraph with a footnote[^12].
[^1]: Footnote definition 1
[^2]: Footnote definition 2
[^3]: Footnote definition 3
[^4]: Footnote definition 4
[^5]: Footnote definition 5
[^6]: Footnote definition 6
[^7]: Footnote definition 7
[^8]: Footnote definition 8
[^9]: Footnote definition 9
[^10]: Footnote definition 10
[^11]: Footnote definition 11
[^12]: Footnote definition 12
" do
assert_html_output %(
- 1. Item 1
- 2. Item 2
- 3. Item 3
This is a paragraph with a footnote.
- 1. Item 1
- 2. Item 2
- 3. Item 3
This is a paragraph with a footnote.
- 1. Item 1
- 2. Item 2
- 3. Item 3
This is a paragraph with a footnote.
)
end
test_given_govspeak "
$LegislativeList
* 1. Item 1[^1] with a [link](http://www.gov.uk)
* 2. Item 2
* 3. Item 3
$EndLegislativeList
This is a paragraph with a footnote[^2]
[^1]: Footnote definition one
[^2]: Footnote definition two
" do
assert_html_output %(
- 1. Item 1 with a link
- 2. Item 2
- 3. Item 3
This is a paragraph with a footnote
)
end
test_given_govspeak "
$LegislativeList
* 1. Item 1[^1] with a [link](http://www.gov.uk)
* 2. Item 2
* 3. Item 3[^2]
$EndLegislativeList
[^1]: Footnote definition one with a [link](http://www.gov.uk) included
[^2]: Footnote definition two with an external [link](http://www.google.com)
" do
assert_html_output %(
- 1. Item 1 with a link
- 2. Item 2
- 3. Item 3
)
end
test_given_govspeak "
$LegislativeList
1. some text[^1]:
$EndLegislativeList
[^1]: footnote text
" do
assert_html_output %(
)
end
test_given_govspeak "
$LegislativeList
1. some text[^1]: extra
$EndLegislativeList
[^1]: footnote text
" do
assert_html_output %(
)
end
test_given_govspeak "
$LegislativeList
* 1. Item 1[^1] with an ACRONYM
* 2. Item 2[^2]
* 3. Item 3[^3]
$EndLegislativeList
[^1]: Footnote definition one
[^2]: Footnote definition two with an ACRONYM
[^3]: Footnote definition three with an acronym that matches an HTML tag class
*[ACRONYM]: This is the acronym explanation
*[class]: Testing HTML matching
" do
assert_html_output %(
- 1. Item 1 with an ACRONYM
- 2. Item 2
- 3. Item 3
)
end
test_given_govspeak "
The quick brown
$LegislativeList
* 1. fox jumps over
" do
assert_html_output "
The quick brown
$LegislativeList
* 1. fox jumps over
"
end
test_given_govspeak "
The quick brown fox
$LegislativeList
* 1. jumps over the lazy dog
$EndLegislativeList
" do
assert_html_output %(
The quick brown fox
- 1. jumps over the lazy dog
)
end
test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list\r\n$EndLegislativeList" do
assert_html_output %(
This bit of text
- 1. should be turned into a list
)
end
test_given_govspeak "
$LegislativeList
Welcome to the GOV.UK website
$EndLegislativeList
*[GOV.UK]: The official UK government website
*[website]: A collection of web pages, such as GOV.UK
" do
assert_html_output %(
Welcome to the GOV.UK website
)
end
test_given_govspeak "
$LegislativeList
Please email
$EndLegislativeList
*[GOV.UK]: The official UK government website
" do
assert_html_output %(
)
end
test_given_govspeak "
$LegislativeList
Welcome to the GOV.UK[^1]
$EndLegislativeList
[^1]: GOV.UK is the official UK government website
*[GOV.UK]: The official UK government website
*[website]: A collection of web pages, such as GOV.UK
*[GOV.UK]: The official UK government website
" do
assert_html_output %(
)
end
test "LegislativeList with image" do
given_govspeak "
$LegislativeList
[Image:image-id]
$EndLegislativeList
Some text
", images: [build_image] do
assert_html_output %(
Some text
)
end
end
test_given_govspeak "
Zippy, Bungle and George did not qualify for the tax exemption in s428. They filled in their tax return accordingly.
" do
assert_html_output %(
Zippy, Bungle and George did not qualify for the tax exemption in s428. They filled in their tax return accordingly.
)
end
test_given_govspeak ":scotland: I am very devolved\n and very scottish \n:scotland:" do
assert_html_output '
I am very devolved
and very scottish
'
end
test_given_govspeak "@ Message with [a link](http://foo.bar/)@" do
assert_html_output %(
)
end
test "sanitize source input by default" do
document = Govspeak::Document.new("")
assert_equal "", document.to_html.strip
end
test "it can have sanitizing disabled" do
document = Govspeak::Document.new("", sanitize: false)
assert_equal "", document.to_html.strip
end
test "it can exclude stipulated elements from sanitization" do
document = Govspeak::Document.new("some content", allowed_elements: %w[uncommon-element])
assert_equal "some content", document.to_html.strip
end
test "identifies a Govspeak document containing malicious HTML as invalid" do
document = Govspeak::Document.new("")
refute document.valid?
end
test "identifies a Govspeak document containing acceptable HTML as valid" do
document = Govspeak::Document.new("some content
")
assert document.valid?
end
test "should remove quotes surrounding a blockquote" do
govspeak = %(
He said:
> "I'm not sure what you mean!"
Or so we thought.)
given_govspeak(govspeak) do
assert_html_output %(
He said:
I’m not sure what you mean!
Or so we thought.
)
end
end
test "should add class to last paragraph of blockquote" do
govspeak = "
> first line
>
> last line"
given_govspeak(govspeak) do
assert_html_output %(
first line
last line
)
end
end
end