require 'kramdown' require 'govspeak/header_extractor' require 'govspeak/html_validator' require 'govspeak/html_sanitizer' require 'kramdown/parser/kramdown_with_automatic_external_links' require 'htmlentities' module Govspeak class Document Parser = Kramdown::Parser::KramdownWithAutomaticExternalLinks PARSER_CLASS_NAME = Parser.name.split("::").last @@extensions = [] attr_accessor :images def self.to_html(source, options = {}) new(source, options).to_html end def initialize(source, options = {}) @source = source ? source.dup : "" Parser.document_domains = options.delete(:document_domains) @options = {input: PARSER_CLASS_NAME, entity_output: :symbolic}.merge(options) @images = [] end def kramdown_doc @kramdown_doc ||= Kramdown::Document.new(preprocess(@source), @options) end private :kramdown_doc def to_html kramdown_doc.to_html end def to_sanitized_html HtmlSanitizer.new(to_html).sanitize end def to_text HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip) end def valid? Govspeak::HtmlValidator.new(@source).valid? end def headers Govspeak::HeaderExtractor.convert(kramdown_doc).first end def preprocess(source) @@extensions.each do |title,regexp,block| source.gsub!(regexp) {|match| instance_exec($1, &block) } end source end def encode(text) HTMLEntities.new.encode(text) end private :encode def self.extension(title, regexp = nil, &block) regexp ||= %r${::#{title}}(.*?){:/#{title}}$m @@extensions << [title, regexp, block] end def self.surrounded_by(open, close=nil) open = Regexp::escape(open) if close close = Regexp::escape(close) %r+(?:\r|\n|^)#{open}(.*?)#{close} *(\r|\n|$)?+m else %r+(?:\r|\n|^)#{open}(.*?)#{open}? *(\r|\n|$)+m end end def self.wrap_with_div(class_name, character, parser=Kramdown::Document) extension(class_name, surrounded_by(character)) { |body| content = parser ? parser.new("#{body.strip}\n").to_html : body.strip %{\n
\n#{content}
\n} } end def insert_strong_inside_p(body, parser=Kramdown::Document) parser.new(body.strip).to_html.sub(/^

(.*)<\/p>$/,"

\\1

") end extension('reverse') { |body| body.reverse } extension('highlight-answer') { |body| %{\n\n
#{Kramdown::Document.new(body.strip).to_html}
\n} } # FIXME: these surrounded_by arguments look dodgy extension('external', surrounded_by("x[", ")x")) { |body| Kramdown::Document.new("[#{body.strip}){:rel='external'}").to_html } extension('informational', surrounded_by("^")) { |body| %{\n\n
#{Kramdown::Document.new(body.strip).to_html}
\n} } extension('important', surrounded_by("@")) { |body| %{\n\n
#{insert_strong_inside_p(body)}
\n} } extension('helpful', surrounded_by("%")) { |body| %{\n\n
\n#{Kramdown::Document.new(body.strip).to_html}
\n} } extension('attached-image', /^!!([0-9]+)/) do |image_number| image = images[image_number.to_i - 1] if image caption = image.caption rescue nil render_image(image.url, image.alt_text, caption) else "" end end def render_image(url, alt_text, caption = nil) lines = [] lines << '
' lines << %Q{
#{encode(alt_text)}
} lines << %Q{
#{encode(caption.strip)}
} if caption && !caption.strip.empty? lines << '
' lines.join "\n" end wrap_with_div('summary', '$!') wrap_with_div('form-download', '$D') wrap_with_div('contact', '$C') wrap_with_div('place', '$P', Govspeak::Document) wrap_with_div('information', '$I', Govspeak::Document) wrap_with_div('additional-information', '$AI') wrap_with_div('example', '$E', Govspeak::Document) wrap_with_div('call-to-action', '$CTA', Govspeak::Document) extension('address', surrounded_by("$A")) { |body| %{

\n#{body.sub("\n", "").gsub("\n", "
")}\n

\n} } extension("numbered list", /((s\d+\.\s.*(?:\n|$))+)/) do |body| steps ||= 0 body.gsub!(/s(\d+)\.\s(.*)(?:\n|$)/) do |b| "
  • #{Kramdown::Document.new($2.strip).to_html}
  • \n" end %{
      \n#{body}
    } end def self.devolved_options { 'scotland' => 'Scotland', 'england' => 'England', 'england-wales' => 'England and Wales', 'northern-ireland' => 'Northern Ireland', 'wales' => 'Wales', 'london' => 'London' } end devolved_options.each do |k,v| extension("devolved-#{k}",/:#{k}:(.*?):#{k}:/m) do |body| %{

    This section applies to #{v}

    #{Kramdown::Document.new(body.strip).to_html}
    \n} end end end end