require 'kramdown' require 'govspeak/header_extractor' require 'govspeak/structured_header_extractor' require 'govspeak/html_validator' require 'govspeak/html_sanitizer' require 'kramdown/parser/kramdown_with_automatic_external_links' require 'htmlentities' module Govspeak class Document Parser = Kramdown::Parser::KramdownWithAutomaticExternalLinks PARSER_CLASS_NAME = Parser.name.split("::").last @@extensions = [] attr_accessor :images def self.to_html(source, options = {}) new(source, options).to_html end def initialize(source, options = {}) @source = source ? source.dup : "" @options = {input: PARSER_CLASS_NAME, entity_output: :symbolic}.merge(options) @images = [] end def kramdown_doc @kramdown_doc ||= Kramdown::Document.new(preprocess(@source), @options) end private :kramdown_doc def to_html kramdown_doc.to_html end def to_sanitized_html HtmlSanitizer.new(to_html).sanitize end def to_sanitized_html_without_images HtmlSanitizer.new(to_html).sanitize_without_images end def to_text HTMLEntities.new.decode(to_html.gsub(/(?:<[^>]+>|\s)+/, " ").strip) end def valid? Govspeak::HtmlValidator.new(@source).valid? end def headers Govspeak::HeaderExtractor.convert(kramdown_doc).first end def structured_headers Govspeak::StructuredHeaderExtractor.new(self).call end def preprocess(source) @@extensions.each do |title,regexp,block| source.gsub!(regexp) {|match| instance_exec($1, &block) } end source end def encode(text) HTMLEntities.new.encode(text) end private :encode def self.extension(title, regexp = nil, &block) regexp ||= %r${::#{title}}(.*?){:/#{title}}$m @@extensions << [title, regexp, block] end def self.surrounded_by(open, close=nil) open = Regexp::escape(open) if close close = Regexp::escape(close) %r+(?:\r|\n|^)#{open}(.*?)#{close} *(\r|\n|$)?+m else %r+(?:\r|\n|^)#{open}(.*?)#{open}? *(\r|\n|$)+m end end def self.wrap_with_div(class_name, character, parser=Kramdown::Document) extension(class_name, surrounded_by(character)) { |body| content = parser ? parser.new("#{body.strip}\n").to_html : body.strip %{\n
(.*)<\/p>$/,"
\\1
") end extension('reverse') { |body| body.reverse } extension('highlight-answer') { |body| %{\n\n\n#{body.sub("\n", "").gsub("\n", "
")}\n
This section applies to #{v}