lib/govspeak.rb in govspeak-6.7.1 vs lib/govspeak.rb in govspeak-6.7.2

- old
+ new

@@ -60,22 +60,34 @@ @locale = options.fetch(:locale, "en") @options = { input: PARSER_CLASS_NAME, sanitize: true, syntax_highlighter: nil }.merge(options) @options[:entity_output] = :symbolic + @footnote_definition_html = nil + @acronyms = [] end def to_html @to_html ||= begin - html = if @options[:sanitize] - HtmlSanitizer.new(kramdown_doc.to_html).sanitize(allowed_elements: @allowed_elements) - else - kramdown_doc.to_html - end + html = if @options[:sanitize] + HtmlSanitizer.new(kramdown_doc.to_html).sanitize(allowed_elements: @allowed_elements) + else + kramdown_doc.to_html + end - Govspeak::PostProcessor.process(html, self) - end + unless @footnote_definition_html.nil? + regex = /<div class="footnotes".*[<\/div>]/m + + if html.scan(regex).empty? + html << @footnote_definition_html + else + html.gsub!(regex, @footnote_definition_html) + end + end + + Govspeak::PostProcessor.process(html, self) + end end def to_liquid to_html end @@ -108,18 +120,55 @@ end def preprocess(source) source = Govspeak::BlockquoteExtraQuoteRemover.remove(source) source = remove_forbidden_characters(source) + + legislative_list_footnote_definitions(source) + self.class.extensions.each do |_, regexp, block| source.gsub!(regexp) do instance_exec(*Regexp.last_match.captures, &block) end end source end + def legislative_list_footnote_definitions(source) + is_legislative_list = source.scan(/\$LegislativeList.*?\[\^\d\]*.*?\$EndLegislativeList/m).size.positive? + footnotes = source.scan(/\[\^(\d)\]:(.*)/) + @acronyms = source.scan(/(?<=\*)\[(.*)\]:(.*)/) + + if is_legislative_list && footnotes.size.positive? + list_items = footnotes.map do |footnote| + number = footnote[0] + text = footnote[1].strip + footnote_definition = Govspeak::Document.new(text).to_html[/(?<=<p>).*(?=<\/p>)/] + + <<~HTML_SNIPPET + <li id="fn:#{number}" role="doc-endnote"> + <p> + #{footnote_definition}<a href="#fnref:#{number}" class="reversefootnote" role="doc-backlink" aria-label="go to where this is referenced">↩</a> + </p> + </li> + HTML_SNIPPET + end + + @footnote_definition_html = <<~HTML_CONTAINER + <div class="footnotes" role="doc-endnotes"> + <ol> + #{list_items.join.strip} + </ol> + </div> + HTML_CONTAINER + end + + unless @footnote_definition_html.nil? && @acronyms.size.positive? + add_acronym_alt_text(@footnote_definition_html) + end + end + def remove_forbidden_characters(source) # These are characters that are not deemed not suitable for # markup: https://www.w3.org/TR/unicode-xml/#Charlist source.gsub(Sanitize::REGEX_UNSUITABLE_CHARS, "") end @@ -159,11 +208,11 @@ \] # match end of link markdown \( # match start of link text markdown ([^)]+) # capture inside of link text markdown \) # match end of link text markdown \s* # any whitespace between opening bracket and link - {\/button} # match ending bracket + {/button} # match ending bracket (?:\r|\n|$) # non-capturing match to make sure end of line and linebreak }x) do |attributes, text, href| button_classes = "govuk-button" /cross-domain-tracking:(?<cross_domain_tracking>.[^\s*]+)/ =~ attributes data_attribute = "" @@ -293,10 +342,22 @@ Govspeak::KramdownOverrides.with_kramdown_ordered_lists_disabled do Kramdown::Document.new(body.strip).to_html.tap do |doc| doc.gsub!("<ul>", "<ol>") doc.gsub!("</ul>", "</ol>") doc.sub!("<ol>", '<ol class="legislative-list">') + + footnotes = body.scan(/\[\^(\d+)\]/).flatten + + footnotes.each do |footnote| + html = "<sup id=\"fnref:#{footnote}\" role=\"doc-noteref\">" \ + "<a href=\"#fn:#{footnote}\" class=\"footnote\" rel=\"footnote\">" \ + "[footnote #{footnote}]</a></sup>" + + doc.sub!(/(\[\^#{footnote}\])/, html) + end + + add_acronym_alt_text(doc) if @acronyms.size.positive? end end end extension("numbered list", /^[ \t]*((s\d+\.\s.*(?:\n|$))+)/) do |body| @@ -383,9 +444,15 @@ @kramdown_doc ||= Kramdown::Document.new(preprocess(@source), @options) end def encode(text) HTMLEntities.new.encode(text) + end + + def add_acronym_alt_text(html) + @acronyms.each do |acronym| + html.gsub!(acronym[0], "<abbr title=\"#{acronym[1].strip}\">#{acronym[0]}</abbr>") + end end end end I18n.load_path.unshift(