lib/slodown/formatter.rb in slodown-0.1.3 vs lib/slodown/formatter.rb in slodown-0.2.0
- old
+ new
@@ -1,50 +1,92 @@
module Slodown
+ # This is the base Formatter class provided by Slodown. It works right
+ # out of the box if you want to use exactly the functionality provided by
+ # it, but in most projects, you'll probably want to create a new class
+ # inheriting from this one.
+ #
class Formatter
def initialize(source)
@current = @source = source.to_s
end
- # Runs the entire pipeline.
+ # Run the entire pipeline in a sane order.
#
def complete
- markdown.autolink.sanitize
+ extract_metadata.markdown.autolink.sanitize
end
# Convert the current document state from Markdown into HTML.
#
def markdown
- @current = Kramdown::Document.new(@current).to_slodown_html
- self
+ convert do |current|
+ Kramdown::Document.new(current, kramdown_options).to_slodown_html
+ end
end
# Auto-link URLs through Rinku.
#
def autolink
- @current = Rinku.auto_link(@current)
- self
+ convert do |current|
+ Rinku.auto_link(current)
+ end
end
# Sanitize HTML tags.
#
def sanitize
- @current = Sanitize.clean(@current, sanitize_config)
- self
+ convert do |current|
+ Sanitize.clean(current, sanitize_config)
+ end
end
+ def extract_metadata
+ @metadata = {}
+
+ convert do |current|
+ current.each_line.drop_while do |line|
+ next false if line !~ /^#\+([a-z_]+): (.*)/
+
+ key, value = $1, $2
+ @metadata[key.to_sym] = value
+ end.join('')
+ end
+ end
+
+ # Return a hash with the extracted metadata
+ #
+ def metadata
+ @metadata
+ end
+
def to_s
@current
end
private
+ # Applies a conversion of the current text state.
+ #
+ def convert(&blk)
+ @current = blk.call(@current)
+ self
+ end
+
+ def kramdown_options
+ {
+ syntax_highlighter: 'coderay',
+ syntax_highlighter_opts: {
+ }
+ }
+ end
+
def sanitize_config
{
elements: %w(
- p a span sub sup strong em div hr abbr
+ p br a span sub sup strong em div hr abbr s
ul ol li
- blockquote pre code
+ blockquote pre code kbd
h1 h2 h3 h4 h5 h6
img object param del
),
attributes: {
:all => ['class', 'style', 'title', 'id'],
@@ -64,10 +106,45 @@
'embed' => {'src' => ['http', 'https']},
'object' => {'src' => ['http', 'https']},
'li' => {'id' => ['fn']},
'sup' => {'id' => ['fnref']}
},
- transformers: EmbedTransformer
+ transformers: transformers
}
+ end
+
+ def allowed_iframe_hosts
+ # By default, allow everything. Override this to return a regular expression
+ # that will be matched against the iframe/embed's src URL's host.
+ /.*/
+ end
+
+ def transformers
+ [embed_transformer]
+ end
+
+ def embed_transformer
+ lambda do |env|
+ node = env[:node]
+ node_name = env[:node_name]
+
+ # We're fine with a bunch of stuff -- but not <iframe> and <embed> tags.
+ return if env[:is_whitelisted] || !env[:node].element?
+ return unless %w[iframe embed].include? env[:node_name]
+
+ # We're dealing with an <iframe> or <embed> tag! Let's check its src attribute.
+ # If its host name matches our regular expression, we can whitelist it.
+ uri = URI(env[:node]['src'])
+ return unless uri.host =~ allowed_iframe_hosts
+
+ Sanitize.clean_node!(node, {
+ elements: %w[iframe embed],
+ attributes: {
+ all: %w[allowfullscreen frameborder height src width]
+ }
+ })
+
+ { node_whitelist: [node] }
+ end
end
end
end