# encoding: UTF-8 require 'test_helper' require 'govspeak_test_helper' require 'ostruct' class GovspeakTest < Minitest::Test include GovspeakTestHelper test "simple smoke-test" do rendered = Govspeak::Document.new("*this is markdown*").to_html assert_equal "

this is markdown

\n", rendered end test "simple smoke-test for simplified API" do rendered = Govspeak::Document.to_html("*this is markdown*") assert_equal "

this is markdown

\n", rendered end test "simple block extension" do rendered = Govspeak::Document.new("this \n{::reverse}\n*is*\n{:/reverse}\n markdown").to_html assert_equal "

this

\n\n

si

\n\n

markdown

\n", rendered end test "highlight-answer block extension" do rendered = Govspeak::Document.new("this \n{::highlight-answer}Lead in to *BIG TEXT*\n{:/highlight-answer}").to_html assert_equal %Q{

this

\n\n
\n

Lead in to BIG TEXT

\n
\n}, rendered end test "stat-headline block extension" do rendered = Govspeak::Document.new("this \n{stat-headline}*13.8bn* Age of the universe in years{/stat-headline}").to_html assert_equal %Q{

this

\n\n\n}, rendered end test "extracts headers with text, level and generated id" do document = Govspeak::Document.new %{ # Big title ### Small subtitle ## Medium title } assert_equal [ Govspeak::Header.new('Big title', 1, 'big-title'), Govspeak::Header.new('Small subtitle', 3, 'small-subtitle'), Govspeak::Header.new('Medium title', 2, 'medium-title') ], document.headers end test "extracts different ids for duplicate headers" do document = Govspeak::Document.new("## Duplicate header\n\n## Duplicate header") assert_equal [ Govspeak::Header.new('Duplicate header', 2, 'duplicate-header'), Govspeak::Header.new('Duplicate header', 2, 'duplicate-header-1') ], document.headers end test "extracts headers when nested inside blocks" do document = Govspeak::Document.new %{ # First title
## Nested subtitle
### Double nested subtitle
### Second double subtitle
} assert_equal [ Govspeak::Header.new('First title', 1, 'first-title'), Govspeak::Header.new('Nested subtitle', 2, 'nested-subtitle'), Govspeak::Header.new('Double nested subtitle', 3, 'double-nested-subtitle'), Govspeak::Header.new('Second double subtitle', 3, 'second-double-subtitle') ], document.headers end test "extracts headers with explicitly specified ids" do document = Govspeak::Document.new %{ # First title ## Second title {#special} } assert_equal [ Govspeak::Header.new('First title', 1, 'first-title'), Govspeak::Header.new('Second title', 2, 'special'), ], document.headers end test "extracts text with no HTML and normalised spacing" do input = "# foo\n\nbar baz " doc = Govspeak::Document.new(input) assert_equal "foo bar baz", doc.to_text end test "trailing space after the address should not prevent parsing" do input = %{$A 123 Test Street Testcase Cliffs Teston 0123 456 7890 $A } doc = Govspeak::Document.new(input) assert_equal %{\n

\n123 Test Street
Testcase Cliffs
Teston
0123 456 7890 \n

\n}, doc.to_html end test "address div is separated from paragraph text by a couple of line-breaks" do # else kramdown processes address div as part of paragraph text and escapes HTML input = %{Paragraph1 $A 123 Test Street Testcase Cliffs Teston 0123 456 7890 $A} doc = Govspeak::Document.new(input) assert_equal %{

Paragraph1

\n\n

\n123 Test Street
Testcase Cliffs
Teston
0123 456 7890 \n

\n}, doc.to_html end test_given_govspeak("^ I am very informational ^") do assert_html_output %{

I am very informational

} assert_text_output "I am very informational" end test "processing an extension does not modify the provided input" do input = "^ I am very informational" Govspeak::Document.new(input).to_html assert_equal "^ I am very informational", input end test_given_govspeak "The following is very informational\n^ I am very informational ^" do assert_html_output %{

The following is very informational

I am very informational

} assert_text_output "The following is very informational I am very informational" end test_given_govspeak "^ I am very informational" do assert_html_output %{

I am very informational

} assert_text_output "I am very informational" end test_given_govspeak "@ I am very important @" do assert_html_output %{

I am very important

} assert_text_output "I am very important" end test_given_govspeak " The following is very important @ I am very important @ " do assert_html_output %{

The following is very important

I am very important

} assert_text_output "The following is very important I am very important" end test_given_govspeak "% I am very helpful %" do assert_html_output %{

I am very helpful

} assert_text_output "I am very helpful" end test_given_govspeak "The following is very helpful\n% I am very helpful %" do assert_html_output %{

The following is very helpful

I am very helpful

} assert_text_output "The following is very helpful I am very helpful" end test_given_govspeak "## Hello ##\n\n% I am very helpful %\r\n### Young Workers ###\n\n" do assert_html_output %{

Hello

I am very helpful

Young Workers

} assert_text_output "Hello I am very helpful Young Workers" end test_given_govspeak "% I am very helpful" do assert_html_output %{

I am very helpful

} assert_text_output "I am very helpful" end test_given_govspeak "This is a [link](http://www.gov.uk) isn't it?" do assert_html_output '

This is a link isn’t it?

' assert_text_output "This is a link isn’t it?" end test_given_govspeak "This is a [link with an at sign in it](http://www.gov.uk/@dg/@this) isn't it?" do assert_html_output '

This is a link with an at sign in it isn’t it?

' assert_text_output "This is a link with an at sign in it isn’t it?" end test_given_govspeak " HTML *[HTML]: Hyper Text Markup Language" do assert_html_output %{

HTML

} assert_text_output "HTML" end test_given_govspeak "x[a link](http://rubyforge.org)x" do assert_html_output '

a link

' assert_text_output "a link" end test_given_govspeak "x[an xx link](http://x.com)x" do assert_html_output '

an xx link

' end test_given_govspeak "[internal link](http://www.gov.uk)" do assert_html_output '

internal link

' end test_given_govspeak "[link with no host is assumed to be internal](/)" do assert_html_output '

link with no host is assumed to be internal

' end test_given_govspeak "[internal link with rel attribute keeps it](http://www.gov.uk){:rel='next'}" do assert_html_output '

' end test_given_govspeak "[external link without x markers](http://www.google.com)" do assert_html_output '

external link without x markers

' end test_given_govspeak "[external link with rel attribute](http://www.google.com){:rel='next'}" do assert_html_output '

' end test_given_govspeak "Text before [an external link](http://www.google.com)" do assert_html_output '

Text before an external link

' end test_given_govspeak "[An external link](http://www.google.com) with text afterwards" do assert_html_output '

An external link with text afterwards

' end test_given_govspeak "Text before [an external link](http://www.google.com) and text afterwards" do assert_html_output '

Text before an external link and text afterwards

' end test_given_govspeak "![image with external url](http://www.example.com/image.jpg)" do assert_html_output '

image with external url

' end test "should be able to override default 'document_domains' option" do html = Govspeak::Document.new("[internal link](http://www.not-external.com)", document_domains: %w(www.not-external.com)).to_html refute html.include?('rel="external"'), "should not consider www.not-external.com as an external url" end test "should be able to supply multiple domains for 'document_domains' option" do html = Govspeak::Document.new("[internal link](http://www.not-external-either.com)", document_domains: %w(www.not-external.com www.not-external-either.com)).to_html refute html.include?('rel="external"'), "should not consider www.not-external-either.com as an external url" end test "should be able to override default 'input' option" do html = Govspeak::Document.new("[external link](http://www.external.com)", input: "kramdown").to_html refute html.include?('rel="external"'), "should not automatically add rel external attribute" end test "should be able to override default 'entity output' option" do html = Govspeak::Document.new("¥", entity_output: :numeric).to_html assert html.include?("¥") end test "should assume a link with an invalid uri is internal" do html = Govspeak::Document.new("[link](:invalid-uri)").to_html refute html.include?('rel="external"') end test "should treat a mailto as internal" do html = Govspeak::Document.new("[link](mailto:a@b.com)").to_html refute html.include?('rel="external"') assert_equal %Q{

link

\n}, deobfuscate_mailto(html) end test "permits mailto:// URI" do html = Govspeak::Document.new("[link](mailto://a@b.com)").to_html assert_equal %Q{

link

\n}, deobfuscate_mailto(html) end test "permits dud mailto: URI" do html = Govspeak::Document.new("[link](mailto:)").to_html assert_equal %Q{

link

\n}, deobfuscate_mailto(html) end test "permits trailing whitespace in an URI" do Govspeak::Document.new("[link](http://example.com/%20)").to_html end # Regression test - the surrounded_by helper doesn't require the closing x # so 'xaa' was getting picked up by the external link helper above # TODO: review whether we should require closing symbols for these extensions # need to check all existing content. test_given_govspeak "xaa" do assert_html_output '

xaa

' assert_text_output "xaa" end test_given_govspeak " $! rainbow $!" do assert_html_output %{

rainbow

} assert_text_output "rainbow" end test_given_govspeak "$C help, send cake $C" do assert_html_output %{

help, send cake

} assert_text_output "help, send cake" end test_given_govspeak " $A street road $A" do assert_html_output %{

street
road

} assert_text_output "street road" end test_given_govspeak " $P $I help $I $P" do assert_html_output %{
\n\n
\n

help

\n
\n
} assert_text_output "help" end test_given_govspeak " $D can you tell me how to get to... $D" do assert_html_output %{

can you tell me how to get to…

} assert_text_output "can you tell me how to get to…" end test_given_govspeak " $CTA Click here to start the tool $CTA" do assert_html_output %{

Click here to start the tool

} assert_text_output "Click here to start the tool" end test_given_govspeak "Here is some text $CTA Click here to start the tool $CTA " do assert_html_output %{

Here is some text

Click here to start the tool

} end test_given_govspeak " [internal link](http://www.not-external.com) $CTA Click here to start the tool $CTA", [], document_domains: %w(www.not-external.com) do assert_html_output %{

internal link

Click here to start the tool

} end test_given_govspeak " 1. rod 2. jane 3. freddy" do assert_html_output "
    \n
  1. rod
  2. \n
  3. jane
  4. \n
  5. freddy
  6. \n
" assert_text_output "rod jane freddy" end test_given_govspeak " s1. zippy s2. bungle s3. george " do assert_html_output %{
  1. zippy

  2. bungle

  3. george

} assert_text_output "zippy bungle george" end test_given_govspeak " - unordered - list s1. step s2. list " do assert_html_output %{
  1. step

  2. list

} assert_text_output "unordered list step list" end test_given_govspeak " $LegislativeList * 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce felis ante, lobortis non quam sit amet, tempus interdum justo. Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu. fringilla, metus dui scelerisque est. * a) A list item * b) Another list item * 1.1 Second entry Curabitur pretium pharetra sapien, a feugiat arcu euismod eget. Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus $EndLegislativeList " do assert_html_output %{
  1. 1.0 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Fusce felis ante, lobortis non quam sit amet, tempus interdum justo.

    Pellentesque quam enim, egestas sit amet congue sit amet, ultrices vitae arcu. fringilla, metus dui scelerisque est.

    1. a) A list item

    2. b) Another list item

  2. 1.1 Second entry Curabitur pretium pharetra sapien, a feugiat arcu euismod eget. Nunc luctus ornare varius. Nulla scelerisque, justo dictum dapibus

} end test_given_govspeak " $LegislativeList * 1. The quick * 2. Brown fox * a) Jumps over * b) The lazy * 3. Dog $EndLegislativeList " do assert_html_output %{
  1. 1. The quick
  2. 2. Brown fox
    1. a) Jumps over
    2. b) The lazy
  3. 3. Dog
} end test_given_govspeak " The quick brown $LegislativeList * 1. fox jumps over " do assert_html_output "

The quick brown $LegislativeList * 1. fox jumps over

" end test_given_govspeak " The quick brown fox $LegislativeList * 1. jumps over the lazy dog $EndLegislativeList " do assert_html_output %{

The quick brown fox

  1. 1. jumps over the lazy dog
} end test_given_govspeak "This bit of text\r\n\r\n$LegislativeList\r\n* 1. should be turned into a list\r\n$EndLegislativeList" do assert_html_output %{

This bit of text

  1. 1. should be turned into a list
} end test_given_govspeak " Zippy, Bungle and George did not qualify for the tax exemption in s428. They filled in their tax return accordingly. " do assert_html_output %{

Zippy, Bungle and George did not qualify for the tax exemption in s428. They filled in their tax return accordingly.

} end test_given_govspeak ":scotland: I am very devolved\n and very scottish \n:scotland:" do assert_html_output '

This section applies to Scotland

I am very devolved and very scottish

' end test_given_govspeak "@ Message with [a link](http://foo.bar/)@" do assert_html_output %{

Message with a link

} end test "can reference attached images using !!n" do images = [OpenStruct.new(alt_text: 'my alt', url: "http://example.com/image.jpg")] given_govspeak "!!1", images do assert_html_output %Q{
my alt
} end end test "alt text of referenced images is escaped" do images = [OpenStruct.new(alt_text: %Q{my alt '&"<>}, url: "http://example.com/image.jpg")] given_govspeak "!!1", images do assert_html_output %Q{
my alt '&"<>
} end end test "silently ignores an image attachment if the referenced image is missing" do doc = Govspeak::Document.new("!!1") doc.images = [] assert_equal %Q{\n}, doc.to_html end test "adds image caption if given" do images = [OpenStruct.new(alt_text: "my alt", url: "http://example.com/image.jpg", caption: 'My Caption & so on')] given_govspeak "!!1", images do assert_html_output %Q{
my alt
My Caption & so on
} end end test "ignores a blank caption" do images = [OpenStruct.new(alt_text: "my alt", url: "http://example.com/image.jpg", caption: ' ')] given_govspeak "!!1", images do assert_html_output %Q{
my alt
} end end test "can sanitize a document" do document = Govspeak::Document.new("") assert_equal "doBadThings();", document.to_sanitized_html.strip end test "can sanitize a document without image" do document = Govspeak::Document.new("") assert_equal "doBadThings();

", document.to_sanitized_html_without_images.gsub(/\s/, "") end test "identifies a Govspeak document containing malicious HTML as invalid" do document = Govspeak::Document.new("") refute document.valid? end test "identifies a Govspeak document containing acceptable HTML as valid" do document = Govspeak::Document.new("
some content
") assert document.valid? end expected_priority_list_output = %| | test "Single priority list ending with EOF" do govspeak = "$PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5" given_govspeak(govspeak) do assert_html_output(expected_priority_list_output) end end test "Single priority list ending with newlines" do govspeak = "$PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5 " given_govspeak(govspeak) do assert_html_output(expected_priority_list_output) end end test 'Single priority list with \n newlines' do govspeak = "$PriorityList:3\n * List item 1\n * List item 2\n * List item 3\n * List item 4\n * List item 5" given_govspeak(govspeak) do assert_html_output(expected_priority_list_output) end end test 'Single priority list with \r\n newlines' do govspeak = "$PriorityList:3\r\n * List item 1\r\n * List item 2\r\n * List item 3\r\n * List item 4\r\n * List item 5" given_govspeak(govspeak) do assert_html_output(expected_priority_list_output) end end test "Multiple priority lists" do govspeak = " $PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5 $PriorityList:1 * List item 1 * List item 2" given_govspeak(govspeak) do assert_html_output %| | end end test "Priority list placed incorrectly" do govspeak = " This is a paragraph $PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5" given_govspeak(govspeak) do assert_html_output("

This is a paragraph $PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5

") end end test "Priority list placed correctly" do govspeak = " This is a paragraph $PriorityList:3 * List item 1 * List item 2 * List item 3 * List item 4 * List item 5" given_govspeak(govspeak) do assert_html_output %|

This is a paragraph

| end end end