lib/slodown/formatter.rb in slodown-0.1.3 vs lib/slodown/formatter.rb in slodown-0.2.0

- old
+ new

@@ -1,50 +1,92 @@ module Slodown + # This is the base Formatter class provided by Slodown. It works right + # out of the box if you want to use exactly the functionality provided by + # it, but in most projects, you'll probably want to create a new class + # inheriting from this one. + # class Formatter def initialize(source) @current = @source = source.to_s end - # Runs the entire pipeline. + # Run the entire pipeline in a sane order. # def complete - markdown.autolink.sanitize + extract_metadata.markdown.autolink.sanitize end # Convert the current document state from Markdown into HTML. # def markdown - @current = Kramdown::Document.new(@current).to_slodown_html - self + convert do |current| + Kramdown::Document.new(current, kramdown_options).to_slodown_html + end end # Auto-link URLs through Rinku. # def autolink - @current = Rinku.auto_link(@current) - self + convert do |current| + Rinku.auto_link(current) + end end # Sanitize HTML tags. # def sanitize - @current = Sanitize.clean(@current, sanitize_config) - self + convert do |current| + Sanitize.clean(current, sanitize_config) + end end + def extract_metadata + @metadata = {} + + convert do |current| + current.each_line.drop_while do |line| + next false if line !~ /^#\+([a-z_]+): (.*)/ + + key, value = $1, $2 + @metadata[key.to_sym] = value + end.join('') + end + end + + # Return a hash with the extracted metadata + # + def metadata + @metadata + end + def to_s @current end private + # Applies a conversion of the current text state. + # + def convert(&blk) + @current = blk.call(@current) + self + end + + def kramdown_options + { + syntax_highlighter: 'coderay', + syntax_highlighter_opts: { + } + } + end + def sanitize_config { elements: %w( - p a span sub sup strong em div hr abbr + p br a span sub sup strong em div hr abbr s ul ol li - blockquote pre code + blockquote pre code kbd h1 h2 h3 h4 h5 h6 img object param del ), attributes: { :all => ['class', 'style', 'title', 'id'], @@ -64,10 +106,45 @@ 'embed' => {'src' => ['http', 'https']}, 'object' => {'src' => ['http', 'https']}, 'li' => {'id' => ['fn']}, 'sup' => {'id' => ['fnref']} }, - transformers: EmbedTransformer + transformers: transformers } + end + + def allowed_iframe_hosts + # By default, allow everything. Override this to return a regular expression + # that will be matched against the iframe/embed's src URL's host. + /.*/ + end + + def transformers + [embed_transformer] + end + + def embed_transformer + lambda do |env| + node = env[:node] + node_name = env[:node_name] + + # We're fine with a bunch of stuff -- but not <iframe> and <embed> tags. + return if env[:is_whitelisted] || !env[:node].element? + return unless %w[iframe embed].include? env[:node_name] + + # We're dealing with an <iframe> or <embed> tag! Let's check its src attribute. + # If its host name matches our regular expression, we can whitelist it. + uri = URI(env[:node]['src']) + return unless uri.host =~ allowed_iframe_hosts + + Sanitize.clean_node!(node, { + elements: %w[iframe embed], + attributes: { + all: %w[allowfullscreen frameborder height src width] + } + }) + + { node_whitelist: [node] } + end end end end