require "test_helper" require "govspeak_test_helper" require "ostruct" class GovspeakTest < Minitest::Test include GovspeakTestHelper test "simple smoke-test" do rendered = Govspeak::Document.new("*this is markdown*").to_html assert_equal "

this is markdown

\n", rendered end test "simple smoke-test for simplified API" do rendered = Govspeak::Document.to_html("*this is markdown*") assert_equal "

this is markdown

\n", rendered end test "strips forbidden unicode characters" do rendered = Govspeak::Document.new( "this is text with forbidden characters \u0008\u000b\ufffe\u{2ffff}\u{5fffe}", ).to_html assert_equal "

this is text with forbidden characters

\n", rendered end test "highlight-answer block extension" do rendered = Govspeak::Document.new("this \n{::highlight-answer}Lead in to *BIG TEXT*\n{:/highlight-answer}").to_html assert_equal %(

this

\n\n
\n

Lead in to BIG TEXT

\n
\n), rendered end test "stat-headline block extension" do rendered = Govspeak::Document.new("this \n{stat-headline}*13.8bn* Age of the universe in years{/stat-headline}").to_html assert_equal %(

this

\n\n
\n

13.8bn Age of the universe in years

\n
\n), rendered end test "extracts headers with text, level and generated id" do document = Govspeak::Document.new %( # Big title ### Small subtitle ## Medium title ) assert_equal [ Govspeak::Header.new("Big title", 1, "big-title"), Govspeak::Header.new("Small subtitle", 3, "small-subtitle"), Govspeak::Header.new("Medium title", 2, "medium-title"), ], document.headers end test "extracts different ids for duplicate headers" do document = Govspeak::Document.new("## Duplicate header\n\n## Duplicate header") assert_equal [ Govspeak::Header.new("Duplicate header", 2, "duplicate-header"), Govspeak::Header.new("Duplicate header", 2, "duplicate-header-1"), ], document.headers end test "extracts headers when nested inside blocks" do document = Govspeak::Document.new %( # First title
## Nested subtitle
### Double nested subtitle
### Second double subtitle
) assert_equal [ Govspeak::Header.new("First title", 1, "first-title"), Govspeak::Header.new("Nested subtitle", 2, "nested-subtitle"), Govspeak::Header.new("Double nested subtitle", 3, "double-nested-subtitle"), Govspeak::Header.new("Second double subtitle", 3, "second-double-subtitle"), ], document.headers end test "extracts headers with explicitly specified ids" do document = Govspeak::Document.new %( # First title ## Second title {#special} ) assert_equal [ Govspeak::Header.new("First title", 1, "first-title"), Govspeak::Header.new("Second title", 2, "special"), ], document.headers end test "extracts text with no HTML and normalised spacing" do input = "# foo\n\nbar baz " doc = Govspeak::Document.new(input) assert_equal "foo bar baz", doc.to_text end test "trailing space after the address should not prevent parsing" do input = %($A 123 Test Street Testcase Cliffs Teston 0123 456 7890 $A ) doc = Govspeak::Document.new(input) assert_equal %(\n

\n123 Test Street
Testcase Cliffs
Teston
0123 456 7890 \n

\n), doc.to_html end test "should convert barchart" do input = <<~GOVSPEAK |col| |---| |val| {barchart} GOVSPEAK html = Govspeak::Document.new(input).to_html assert_equal %(\n \n \n \n \n \n \n \n \n \n \n
col
val
\n), html end test "should convert barchart with stacked compact and negative" do input = <<~GOVSPEAK |col| |---| |val| {barchart stacked compact negative} GOVSPEAK html = Govspeak::Document.new(input).to_html assert_equal %(\n \n \n \n \n \n \n \n \n \n \n
col
val
\n), html end test "address div is separated from paragraph text by a couple of line-breaks" do # else kramdown processes address div as part of paragraph text and escapes HTML input = %(Paragraph1 $A 123 Test Street Testcase Cliffs Teston 0123 456 7890 $A) doc = Govspeak::Document.new(input) assert_equal %(

Paragraph1

\n\n

\n123 Test Street
Testcase Cliffs
Teston
0123 456 7890 \n

\n), doc.to_html end test_given_govspeak("^ I am very informational ^") do assert_html_output %(

I am very informational

) assert_text_output "I am very informational" end test "processing an extension does not modify the provided input" do input = "^ I am very informational" Govspeak::Document.new(input).to_html assert_equal "^ I am very informational", input end test_given_govspeak "The following is very informational\n^ I am very informational ^" do assert_html_output %(

The following is very informational

I am very informational

) assert_text_output "The following is very informational I am very informational" end test_given_govspeak "^ I am very informational" do assert_html_output %(

I am very informational

) assert_text_output "I am very informational" end test_given_govspeak "@ I am very important @" do assert_html_output %(

I am very important

) assert_text_output "I am very important" end test_given_govspeak " The following is very important @ I am very important @ " do assert_html_output %(

The following is very important

I am very important

) assert_text_output "The following is very important I am very important" end test_given_govspeak "% I am very helpful %" do assert_html_output %(

I am very helpful

) assert_text_output "I am very helpful" end test_given_govspeak "The following is very helpful\n% I am very helpful %" do assert_html_output %(

The following is very helpful

I am very helpful

) assert_text_output "The following is very helpful I am very helpful" end test_given_govspeak "## Hello ##\n\n% I am very helpful %\r\n### Young Workers ###\n\n" do assert_html_output %(

Hello

I am very helpful

Young Workers

) assert_text_output "Hello I am very helpful Young Workers" end test_given_govspeak "% I am very helpful" do assert_html_output %(

I am very helpful

) assert_text_output "I am very helpful" end test_given_govspeak "This is a [link](http://www.gov.uk) isn't it?" do assert_html_output '

This is a link isn’t it?

' assert_text_output "This is a link isn’t it?" end test_given_govspeak "This is a [link with an at sign in it](http://www.gov.uk/@dg/@this) isn't it?" do assert_html_output '

This is a link with an at sign in it isn’t it?

' assert_text_output "This is a link with an at sign in it isn’t it?" end test_given_govspeak " HTML *[HTML]: Hyper Text Markup Language" do assert_html_output %(

HTML

) assert_text_output "HTML" end test_given_govspeak "x[a link](http://rubyforge.org)x" do assert_html_output '

a link

' assert_text_output "a link" end test_given_govspeak "x[an xx link](http://x.com)x" do assert_html_output '

an xx link

' end test_given_govspeak "[internal link](http://www.gov.uk)" do assert_html_output '

internal link

' end test_given_govspeak "[link with no host is assumed to be internal](/)" do assert_html_output '

link with no host is assumed to be internal

' end test_given_govspeak "[internal link with rel attribute keeps it](http://www.gov.uk){:rel='next'}" do assert_html_output '

' end test_given_govspeak "[external link without x markers](http://www.google.com)" do assert_html_output '

external link without x markers

' end # Based on Kramdown inline attribute list (IAL) test: # https://github.com/gettalong/kramdown/blob/627978525cf5ee5b290d8a1b8675aae9cc9e2934/test/testcases/span/01_link/link_defs_with_ial.text test_given_govspeak "External link definitions with [attr] and [attr 2] and [attr 3] and [attr before]\n\n[attr]: http://example.com 'title'\n{: hreflang=\"en\" .test}\n\n[attr 2]: http://example.com 'title'\n{: hreflang=\"en\"}\n{: .test}\n\n[attr 3]: http://example.com\n{: .test}\ntest\n\n{: hreflang=\"en\"}\n{: .test}\n[attr before]: http://example.com" do assert_html_output "

External link definitions with attr and attr 2 and attr 3 and attr before

\n\n

test

" end test_given_govspeak "External link with [inline attribute list] (IAL)\n\n[inline attribute list]: http://example.com 'title'\n{: hreflang=\"en\" .test}" do assert_html_output '

External link with inline attribute list (IAL)

' end test_given_govspeak "[external link with rel attribute](http://www.google.com){:rel='next'}" do assert_html_output '

' end test_given_govspeak "Text before [an external link](http://www.google.com)" do assert_html_output '

Text before an external link

' end test_given_govspeak "[An external link](http://www.google.com) with text afterwards" do assert_html_output '

An external link with text afterwards

' end test_given_govspeak "Text before [an external link](http://www.google.com) and text afterwards" do assert_html_output '

Text before an external link and text afterwards

' end test_given_govspeak "![image with external url](http://www.example.com/image.jpg)" do assert_html_output '

image with external url

' end test "should be able to override default 'document_domains' option" do html = Govspeak::Document.new("[internal link](http://www.not-external.com)", document_domains: %w[www.not-external.com]).to_html refute html.include?('rel="external"'), "should not consider www.not-external.com as an external url" end test "should be able to supply multiple domains for 'document_domains' option" do html = Govspeak::Document.new("[internal link](http://www.not-external-either.com)", document_domains: %w[www.not-external.com www.not-external-either.com]).to_html refute html.include?('rel="external"'), "should not consider www.not-external-either.com as an external url" end test "should be able to override default 'input' option" do html = Govspeak::Document.new("[external link](http://www.external.com)", input: "kramdown").to_html refute html.include?('rel="external"'), "should not automatically add rel external attribute" end test "should not be able to override default 'entity output' option" do html = Govspeak::Document.new(">", entity_output: :numeric).to_html assert html.include?(">") end test "should assume a link with an invalid uri is internal" do html = Govspeak::Document.new("[link](:invalid-uri)").to_html refute html.include?('rel="external"') end test "should treat a mailto as internal" do html = Govspeak::Document.new("[link](mailto:a@b.com)").to_html refute html.include?('rel="external"') assert_equal %(

link

\n), deobfuscate_mailto(html) end test "permits mailto:// URI" do html = Govspeak::Document.new("[link](mailto://a@b.com)").to_html assert_equal %(

link

\n), deobfuscate_mailto(html) end test "permits dud mailto: URI" do html = Govspeak::Document.new("[link](mailto:)").to_html assert_equal %(

link

\n), deobfuscate_mailto(html) end test "permits trailing whitespace in an URI" do Govspeak::Document.new("[link](http://example.com/%20)").to_html end # Regression test - the surrounded_by helper doesn't require the closing x # so 'xaa' was getting picked up by the external link helper above # TODO: review whether we should require closing symbols for these extensions # need to check all existing content. test_given_govspeak "xaa" do assert_html_output "

xaa

" assert_text_output "xaa" end test_given_govspeak " $! rainbow $!" do assert_html_output %(

rainbow

) assert_text_output "rainbow" end test_given_govspeak "$C help, send cake $C" do assert_html_output %(

help, send cake

) assert_text_output "help, send cake" end test_given_govspeak " $A street road $A" do assert_html_output %(

street
road

) assert_text_output "street road" end test_given_govspeak " $P $I help $I $P" do assert_html_output %(
\n\n
\n

help

\n
\n
) assert_text_output "help" end test_given_govspeak " $D can you tell me how to get to... $D" do assert_html_output %(

can you tell me how to get to…

) assert_text_output "can you tell me how to get to…" end test_given_govspeak " $CTA Click here to start the tool $CTA" do assert_html_output %(

Click here to start the tool

) assert_text_output "Click here to start the tool" end test_given_govspeak " Here is some text $CTA Click here to start the tool $CTA " do assert_html_output %(

Here is some text

Click here to start the tool

) end test_given_govspeak " $CTA This is a test: s1. This is number 1. s2. This is number 2. s3. This is number 3. s4. This is number 4. $CTA" do assert_html_output %(

This is a test:

  1. This is number 1.

  2. This is number 2.

  3. This is number 3.

  4. This is number 4.

) end test_given_govspeak " $CTA [external link](http://www.external.com) some text $CTA " do assert_html_output %(

external link some text

) end test_given_govspeak " $CTA [internal link](http://www.not-external.com) some text $CTA", document_domains: %w[www.not-external.com] do assert_html_output %(

internal link some text

) end test_given_govspeak " $CTA Click here to start the tool $CTA $C Here is some text $C " do assert_html_output %(

Click here to start the tool

Here is some text

) end test_given_govspeak " [internal link](http://www.not-external.com) $CTA Click here to start the tool $CTA", document_domains: %w[www.not-external.com] do assert_html_output %(

internal link

Click here to start the tool

) end test_given_govspeak " $CTA Click here to start the tool[^1] $CTA [^1]: Footnote definition one " do assert_html_output %(

Click here to start the tool[footnote 1]

  1. Footnote definition one

) end test_given_govspeak " $CTA Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce felis ante[^1], lobortis non quam sit amet, tempus interdum justo. $CTA $CTA Pellentesque quam enim, egestas sit amet congue sit amet[^2], ultrices vitae arcu. Fringilla, metus dui scelerisque est. $CTA [^1]: Footnote definition one [^2]: Footnote definition two " do assert_html_output %(

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce felis ante[footnote 1], lobortis non quam sit amet, tempus interdum justo.

Pellentesque quam enim, egestas sit amet congue sit amet[footnote 2], ultrices vitae arcu. Fringilla, metus dui scelerisque est.

  1. Footnote definition one

  2. Footnote definition two

) end test_given_govspeak " $CTA Click here to start the tool[^1] $CTA Lorem ipsum dolor sit amet[^2] [^1]: Footnote definition 1 [^2]: Footnote definition 2 " do assert_html_output %(

Click here to start the tool[footnote 1]

Lorem ipsum dolor sit amet[footnote 2]

  1. Footnote definition 1

  2. Footnote definition 2

) end test_given_govspeak " 1. rod 2. jane 3. freddy" do assert_html_output "
    \n
  1. rod
  2. \n
  3. jane
  4. \n
  5. freddy
  6. \n
" assert_text_output "rod jane freddy" end test_given_govspeak " s1. zippy s2. bungle s3. george " do assert_html_output %(
  1. zippy

  2. bungle

  3. george

) assert_text_output "zippy bungle george" end test_given_govspeak " - unordered - list s1. step s2. list " do assert_html_output %(
  1. step

  2. list

) assert_text_output "unordered list step list" end test_given_govspeak " $LegislativeList * 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce felis ante, lobortis non quam sit amet, tempus interdum justo. Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu. fringilla, metus dui scelerisque est. * a) A list item * b) Another list item * 1.1 Second entry Curabitur pretium pharetra sapien, a feugiat arcu euismod eget. Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus $EndLegislativeList " do assert_html_output %{
  1. 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.

    Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu. fringilla, metus dui scelerisque est.

    1. a) A list item

    2. b) Another list item

  2. 1.1 Second entry Curabitur pretium pharetra sapien, a feugiat arcu euismod eget. Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus

} end test_given_govspeak " $LegislativeList * 1. The quick * 2. Brown fox * a) Jumps over * b) The lazy * 3. Dog $EndLegislativeList " do assert_html_output %{
  1. 1. The quick
  2. 2. Brown fox
    1. a) Jumps over
    2. b) The lazy
  3. 3. Dog
} end test_given_govspeak " $LegislativeList * 1. Item 1[^1] * 2. Item 2[^2] * 3. Item 3 $EndLegislativeList [^1]: Footnote definition one [^2]: Footnote definition two " do assert_html_output %(
  1. 1. Item 1[footnote 1]
  2. 2. Item 2[footnote 2]
  3. 3. Item 3
  1. Footnote definition one

  2. Footnote definition two

) end test_given_govspeak " $LegislativeList * 1. Item 1[^1] * 2. Item 2 * 3. Item 3 $EndLegislativeList This is a paragraph with a footnote[^2]. $LegislativeList * 1. Item 1 * 2. Item 2[^3] * 3. Item 3 $EndLegislativeList [^1]: Footnote definition one [^2]: Footnote definition two [^3]: Footnote definition two " do assert_html_output %(
  1. 1. Item 1[footnote 1]
  2. 2. Item 2
  3. 3. Item 3

This is a paragraph with a footnote[footnote 2].

  1. 1. Item 1
  2. 2. Item 2[footnote 3]
  3. 3. Item 3
  1. Footnote definition one

  2. Footnote definition two

  3. Footnote definition two

) end test_given_govspeak " $LegislativeList * 1. Item 1[^1] * 2. Item 2[^2] * 3. Item 3[^3] $EndLegislativeList This is a paragraph with a footnote[^4]. $LegislativeList * 1. Item 1[^5] * 2. Item 2[^6] * 3. Item 3[^7] $EndLegislativeList This is a paragraph with a footnote[^8]. $LegislativeList * 1. Item 1[^9] * 2. Item 2[^10] * 3. Item 3[^11] $EndLegislativeList This is a paragraph with a footnote[^12]. [^1]: Footnote definition 1 [^2]: Footnote definition 2 [^3]: Footnote definition 3 [^4]: Footnote definition 4 [^5]: Footnote definition 5 [^6]: Footnote definition 6 [^7]: Footnote definition 7 [^8]: Footnote definition 8 [^9]: Footnote definition 9 [^10]: Footnote definition 10 [^11]: Footnote definition 11 [^12]: Footnote definition 12 " do assert_html_output %(
  1. 1. Item 1[footnote 1]
  2. 2. Item 2[footnote 2]
  3. 3. Item 3[footnote 3]

This is a paragraph with a footnote[footnote 4].

  1. 1. Item 1[footnote 5]
  2. 2. Item 2[footnote 6]
  3. 3. Item 3[footnote 7]

This is a paragraph with a footnote[footnote 8].

  1. 1. Item 1[footnote 9]
  2. 2. Item 2[footnote 10]
  3. 3. Item 3[footnote 11]

This is a paragraph with a footnote[footnote 12].

  1. Footnote definition 1

  2. Footnote definition 2

  3. Footnote definition 3

  4. Footnote definition 4

  5. Footnote definition 5

  6. Footnote definition 6

  7. Footnote definition 7

  8. Footnote definition 8

  9. Footnote definition 9

  10. Footnote definition 10

  11. Footnote definition 11

  12. Footnote definition 12

) end test_given_govspeak " $LegislativeList * 1. Item 1[^1] with a [link](http://www.gov.uk) * 2. Item 2 * 3. Item 3 $EndLegislativeList This is a paragraph with a footnote[^2] [^1]: Footnote definition one [^2]: Footnote definition two " do assert_html_output %(
  1. 1. Item 1[footnote 1] with a link
  2. 2. Item 2
  3. 3. Item 3

This is a paragraph with a footnote[footnote 2]

  1. Footnote definition one

  2. Footnote definition two

) end test_given_govspeak " $LegislativeList * 1. Item 1[^1] with a [link](http://www.gov.uk) * 2. Item 2 * 3. Item 3[^2] $EndLegislativeList [^1]: Footnote definition one with a [link](http://www.gov.uk) included [^2]: Footnote definition two with an external [link](http://www.google.com) " do assert_html_output %(
  1. 1. Item 1[footnote 1] with a link
  2. 2. Item 2
  3. 3. Item 3[footnote 2]
  1. Footnote definition one with a link included

  2. Footnote definition two with an external link

) end test_given_govspeak " $LegislativeList 1. some text[^1]: $EndLegislativeList [^1]: footnote text " do assert_html_output %(

1. some text[footnote 1]:

  1. footnote text

) end test_given_govspeak " $LegislativeList 1. some text[^1]: extra $EndLegislativeList [^1]: footnote text " do assert_html_output %(

1. some text[footnote 1]: extra

  1. footnote text

) end # FIXME: this code is buggy and replaces abbreviations in HTML tags - removing the functionality for now # test_given_govspeak " # $LegislativeList # * 1. Item 1[^1] with an ACRONYM # * 2. Item 2[^2] # * 3. Item 3 # $EndLegislativeList # # [^1]: Footnote definition one # [^2]: Footnote definition two with an ACRONYM # # *[ACRONYM]: This is the acronym explanation # " do # assert_html_output %( #
    #
  1. 1. Item 1[footnote 1] with an ACRONYM #
  2. #
  3. 2. Item 2[footnote 2] #
  4. #
  5. 3. Item 3
  6. #
# #
#
    #
  1. #

    # Footnote definition one #

    #
  2. #
  3. #

    # Footnote definition two with an ACRONYM #

    #
  4. #
#
# ) # end test_given_govspeak " The quick brown $LegislativeList * 1. fox jumps over " do assert_html_output "

The quick brown $LegislativeList * 1. fox jumps over

" end test_given_govspeak " The quick brown fox $LegislativeList * 1. jumps over the lazy dog $EndLegislativeList " do assert_html_output %(

The quick brown fox

  1. 1. jumps over the lazy dog
) end test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list\r\n$EndLegislativeList" do assert_html_output %(

This bit of text

  1. 1. should be turned into a list
) end test_given_govspeak " Zippy, Bungle and George did not qualify for the tax exemption in s428. They filled in their tax return accordingly. " do assert_html_output %(

Zippy, Bungle and George did not qualify for the tax exemption in s428. They filled in their tax return accordingly.

) end test_given_govspeak ":scotland: I am very devolved\n and very scottish \n:scotland:" do assert_html_output '

This section applies to Scotland

I am very devolved and very scottish

' end test_given_govspeak "@ Message with [a link](http://foo.bar/)@" do assert_html_output %(

Message with a link

) end test "sanitize source input by default" do document = Govspeak::Document.new("") assert_equal "", document.to_html.strip end test "it can have sanitizing disabled" do document = Govspeak::Document.new("", sanitize: false) assert_equal "", document.to_html.strip end test "it can exclude stipulated elements from sanitization" do document = Govspeak::Document.new("some content", allowed_elements: %w[uncommon-element]) assert_equal "some content", document.to_html.strip end test "identifies a Govspeak document containing malicious HTML as invalid" do document = Govspeak::Document.new("") refute document.valid? end test "identifies a Govspeak document containing acceptable HTML as valid" do document = Govspeak::Document.new("
some content
") assert document.valid? end expected_priority_list_output = %( ) test "Single priority list ending with EOF" do govspeak = "$PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5" given_govspeak(govspeak) do assert_html_output(expected_priority_list_output) end end test "Single priority list ending with newlines" do govspeak = "$PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5 " given_govspeak(govspeak) do assert_html_output(expected_priority_list_output) end end test 'Single priority list with \n newlines' do govspeak = "$PriorityList:3\n * List item 1\n * List item 2\n * List item 3\n * List item 4\n * List item 5" given_govspeak(govspeak) do assert_html_output(expected_priority_list_output) end end test 'Single priority list with \r\n newlines' do govspeak = "$PriorityList:3\r\n * List item 1\r\n * List item 2\r\n * List item 3\r\n * List item 4\r\n * List item 5" given_govspeak(govspeak) do assert_html_output(expected_priority_list_output) end end test "Multiple priority lists" do govspeak = " $PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5 $PriorityList:1 * List item 1 * List item 2" given_govspeak(govspeak) do assert_html_output %( ) end end test "Priority list placed incorrectly" do govspeak = " This is a paragraph $PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5" given_govspeak(govspeak) do assert_html_output("

This is a paragraph $PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5

") end end test "Priority list placed correctly" do govspeak = " This is a paragraph $PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5" given_govspeak(govspeak) do assert_html_output %(

This is a paragraph

) end end test "should remove quotes surrounding a blockquote" do govspeak = %( He said: > "I'm not sure what you mean!" Or so we thought.) given_govspeak(govspeak) do assert_html_output %(

He said:

I’m not sure what you mean!

Or so we thought.

) end end test "should add class to last paragraph of blockquote" do govspeak = " > first line > > last line" given_govspeak(govspeak) do assert_html_output %(

first line

last line

) end end end