require 'active_support/core_ext/hash'
require 'active_support/core_ext/array'
require 'erb'
require 'htmlentities'
require 'kramdown'
require 'kramdown/parser/kramdown_with_automatic_external_links'
require 'govspeak/header_extractor'
require 'govspeak/structured_header_extractor'
require 'govspeak/html_validator'
require 'govspeak/html_sanitizer'
require 'govspeak/kramdown_overrides'
require 'govspeak/blockquote_extra_quote_remover'
require 'govspeak/post_processor'
require 'govspeak/presenters/attachment_presenter'
require 'govspeak/presenters/contact_presenter'
require 'govspeak/presenters/h_card_presenter'
module Govspeak
class Document
Parser = Kramdown::Parser::KramdownWithAutomaticExternalLinks
PARSER_CLASS_NAME = Parser.name.split("::").last
@@extensions = []
attr_accessor :images
attr_reader :attachments, :contacts, :links, :locale
def self.to_html(source, options = {})
new(source, options).to_html
end
def initialize(source, options = {})
options = options.dup.deep_symbolize_keys
@source = source ? source.dup : ""
@images = options.delete(:images) || []
@attachments = Array.wrap(options.delete(:attachments))
@links = Array.wrap(options.delete(:links))
@contacts = Array.wrap(options.delete(:contacts))
@locale = options.fetch(:locale, "en")
@options = {input: PARSER_CLASS_NAME}.merge(options)
@options[:entity_output] = :symbolic
i18n_load_paths
end
def i18n_load_paths
Dir.glob('locales/*.yml') do |f|
I18n.load_path << f
end
end
private :i18n_load_paths
def kramdown_doc
@kramdown_doc ||= Kramdown::Document.new(preprocess(@source), @options)
end
private :kramdown_doc
def to_html
@html ||= Govspeak::PostProcessor.process(kramdown_doc.to_html)
end
def to_liquid
to_html
end
def t(*args)
options = args.last.is_a?(Hash) ? args.last.dup : {}
key = args.shift
I18n.t(key, options.merge(locale: locale))
end
def to_sanitized_html
HtmlSanitizer.new(to_html).sanitize
end
def to_sanitized_html_without_images
HtmlSanitizer.new(to_html).sanitize_without_images
end
def to_text
HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip)
end
def valid?(validation_options = {})
Govspeak::HtmlValidator.new(@source, validation_options).valid?
end
def headers
Govspeak::HeaderExtractor.convert(kramdown_doc).first
end
def structured_headers
Govspeak::StructuredHeaderExtractor.new(self).call
end
def preprocess(source)
source = Govspeak::BlockquoteExtraQuoteRemover.remove(source)
@@extensions.each do |title,regexp,block|
source.gsub!(regexp) {
instance_exec(*Regexp.last_match.captures, &block)
}
end
source
end
def encode(text)
HTMLEntities.new.encode(text)
end
private :encode
def self.extension(title, regexp = nil, &block)
regexp ||= %r${::#{title}}(.*?){:/#{title}}$m
@@extensions << [title, regexp, block]
end
def self.surrounded_by(open, close=nil)
open = Regexp::escape(open)
if close
close = Regexp::escape(close)
%r+(?:\r|\n|^)#{open}(.*?)#{close} *(\r|\n|$)?+m
else
%r+(?:\r|\n|^)#{open}(.*?)#{open}? *(\r|\n|$)+m
end
end
def self.wrap_with_div(class_name, character, parser=Kramdown::Document)
extension(class_name, surrounded_by(character)) { |body|
content = parser ? parser.new("#{body.strip}\n").to_html : body.strip
%{\n
\n#{content}
\n}
}
end
def insert_strong_inside_p(body, parser=Govspeak::Document)
parser.new(body.strip).to_html.sub(/^
(.*)<\/p>$/,"
\\1
")
end
extension('button', %r{
^ # Match start of line only, allows for indenting code examples
{button(.*?)} # match opening bracket and capture attributes
\s* # any whitespace between opening bracket and link
\[ # match start of link markdown
([^\]]+) # capture inside of link markdown
\] # match end of link markdown
\( # match start of link text markdown
([^)]+) # capture inside of link text markdown
\) # match end of link text markdown
\s* # any whitespace between opening bracket and link
{\/button} # match ending bracket
$ # Match end of line only, allows for indenting code examples
}x) { |attributes, text, href|
button_classes = "button"
button_classes << " button-start" if attributes =~ /start/
/cross-domain-tracking:(?.[^\s*]+)/ =~ attributes
data_attribute = ""
if cross_domain_tracking
data_attribute << " data-module='cross-domain-tracking'"
data_attribute << " data-tracking-code='#{cross_domain_tracking.strip}'"
data_attribute << " data-tracking-name='govspeakButtonTracker'"
end
text = text.strip
href = href.strip
%{\n#{text}\n}
}
extension('highlight-answer') { |body|
%{\n\n
\n}
}
extension('barchart', /{barchart(.*?)}/) do |captures, body|
stacked = '.mc-stacked' if captures.include? 'stacked'
compact = '.compact' if captures.include? 'compact'
negative = '.mc-negative' if captures.include? 'negative'
[
'{:',
'.js-barchart-table',
stacked,
compact,
negative,
'.mc-auto-outdent',
'}'
].join(' ')
end
extension('attached-image', /^!!([0-9]+)/) do |image_number|
image = images[image_number.to_i - 1]
if image
caption = image.caption rescue nil
render_image(image.url, image.alt_text, caption)
else
""
end
end
extension('attachment', /\[embed:attachments:(?!inline:|image:)\s*(.*?)\s*\]/) do |content_id, body|
attachment = attachments.detect { |a| a[:content_id] == content_id }
next "" unless attachment
attachment = AttachmentPresenter.new(attachment)
content = File.read(__dir__ + '/templates/attachment.html.erb')
ERB.new(content).result(binding)
end
extension('attachment inline', /\[embed:attachments:inline:\s*(.*?)\s*\]/) do |content_id|
attachment = attachments.detect { |a| a[:content_id] == content_id }
next "" unless attachment
attachment = AttachmentPresenter.new(attachment)
span_id = attachment.id ? %{ id="attachment_#{attachment.id}"} : ""
# new lines inside our title cause problems with govspeak rendering as this is expected to be on one line.
title = (attachment.title || "").tr("\n", " ")
link = attachment.link(title, attachment.url)
attributes = attachment.attachment_attributes.empty? ? "" : " (#{attachment.attachment_attributes})"
%{#{link}#{attributes}}
end
extension('attachment image', /\[embed:attachments:image:\s*(.*?)\s*\]/) do |content_id|
attachment = attachments.detect { |a| a[:content_id] == content_id }
next "" unless attachment
attachment = AttachmentPresenter.new(attachment)
title = (attachment.title || "").tr("\n", " ")
render_image(attachment.url, title, nil, attachment.id)
end
# As of version 1.12.0 of Kramdown the block elements (div & figcaption)
# inside this html block will have it's < > converted into HTML Entities
# when ever this code is used inside block level elements.
#
# To resolve this we have a post-processing task that will convert this
# back into HTML (I know - it's ugly). The way we could resolve this
# without ugliness would be to output only inline elements which rules
# out div and figcaption
#
# This issue is not considered a bug by kramdown: https://github.com/gettalong/kramdown/issues/191
def render_image(url, alt_text, caption = nil, id = nil)
id_attr = id ? %{ id="attachment_#{id}"} : ""
lines = []
lines << %{'
lines.join
end
wrap_with_div('summary', '$!')
wrap_with_div('form-download', '$D')
wrap_with_div('contact', '$C')
wrap_with_div('place', '$P', Govspeak::Document)
wrap_with_div('information', '$I', Govspeak::Document)
wrap_with_div('additional-information', '$AI')
wrap_with_div('example', '$E', Govspeak::Document)
wrap_with_div('call-to-action', '$CTA', Govspeak::Document)
extension('address', surrounded_by("$A")) { |body|
%{\n
\n#{body.sub("\n", "").gsub("\n", " ")}\n
\n}
}
extension("legislative list", /(?<=\A|\n\n|\r\n\r\n)^\$LegislativeList\s*$(.*?)\$EndLegislativeList/m) do |body|
Govspeak::KramdownOverrides.with_kramdown_ordered_lists_disabled do
Kramdown::Document.new(body.strip).to_html.tap do |doc|
doc.gsub!('
', '')
doc.gsub!('
', '')
doc.sub!('', '')
end
end
end
extension("numbered list", /^[ \t]*((s\d+\.\s.*(?:\n|$))+)/) do |body|
steps ||= 0
body.gsub!(/s(\d+)\.\s(.*)(?:\n|$)/) do |b|
"
#{Govspeak::Document.new($2.strip).to_html}
\n"
end
%{\n#{body}}
end
def self.devolved_options
{ 'scotland' => 'Scotland',
'england' => 'England',
'england-wales' => 'England and Wales',
'northern-ireland' => 'Northern Ireland',
'wales' => 'Wales',
'london' => 'London' }
end
devolved_options.each do |k,v|
extension("devolved-#{k}",/:#{k}:(.*?):#{k}:/m) do |body|
%{
This section applies to #{v}
#{Govspeak::Document.new(body.strip).to_html}
\n}
end
end
extension("Priority list", /(?<=\A|\n\n|\r\n\r\n)^\$PriorityList:(\d+)\s*$(.*?)(?:^\s*$|\Z)/m) do |number_to_show, body|
number_to_show = number_to_show.to_i
tagged = 0
Govspeak::Document.new(body.strip).to_html.gsub(/
/) do |match|
if tagged < number_to_show
tagged += 1
'
'
else
match
end
end
end
extension('embed link', /\[embed:link:\s*(.*?)\s*\]/) do |content_id|
link = links.detect { |l| l[:content_id] == content_id }
next "" unless link
if link[:url]
"[#{link[:title]}](#{link[:url]})"
else
link[:title]
end
end
def render_hcard_address(contact)
HCardPresenter.from_contact(contact).render
end
private :render_hcard_address
extension('Contact', /\[Contact:\s*(.*?)\s*\]/) do |content_id|
contact = contacts.detect { |c| c[:content_id] == content_id }
next "" unless contact
contact = ContactPresenter.new(contact)
@renderer ||= ERB.new(File.read(__dir__ + '/templates/contact.html.erb'))
@renderer.result(binding)
end
end
end