formatter.rb in slodown-0.2.0

- old
+ new

@@ -1,50 +1,92 @@
 module Slodown
+  # This is the base Formatter class provided by Slodown. It works right
+  # out of the box if you want to use exactly the functionality provided by
+  # it, but in most projects, you'll probably want to create a new class
+  # inheriting from this one.
+  #
   class Formatter
     def initialize(source)
       @current = @source = source.to_s
     end
 
-    # Runs the entire pipeline.
+    # Run the entire pipeline in a sane order.
     #
     def complete
-      markdown.autolink.sanitize
+      extract_metadata.markdown.autolink.sanitize
     end
 
     # Convert the current document state from Markdown into HTML.
     #
     def markdown
-      @current = Kramdown::Document.new(@current).to_slodown_html
-      self
+      convert do |current|
+        Kramdown::Document.new(current, kramdown_options).to_slodown_html
+      end
     end
 
     # Auto-link URLs through Rinku.
     #
     def autolink
-      @current = Rinku.auto_link(@current)
-      self
+      convert do |current|
+        Rinku.auto_link(current)
+      end
     end
 
     # Sanitize HTML tags.
     #
     def sanitize
-      @current = Sanitize.clean(@current, sanitize_config)
-      self
+      convert do |current|
+        Sanitize.clean(current, sanitize_config)
+      end
     end
 
+    def extract_metadata
+      @metadata = {}
+
+      convert do |current|
+        current.each_line.drop_while do |line|
+          next false if line !~ /^#\+([a-z_]+): (.*)/
+
+          key, value = $1, $2
+          @metadata[key.to_sym] = value
+        end.join('')
+      end
+    end
+
+    # Return a hash with the extracted metadata
+    #
+    def metadata
+      @metadata
+    end
+
     def to_s
       @current
     end
 
   private
 
+    # Applies a conversion of the current text state.
+    #
+    def convert(&blk)
+      @current = blk.call(@current)
+      self
+    end
+
+    def kramdown_options
+      {
+        syntax_highlighter: 'coderay',
+        syntax_highlighter_opts: {
+        }
+      }
+    end
+
     def sanitize_config
       {
         elements: %w(
-          p a span sub sup strong em div hr abbr
+          p br a span sub sup strong em div hr abbr s
           ul ol li
-          blockquote pre code
+          blockquote pre code kbd
           h1 h2 h3 h4 h5 h6
           img object param del
         ),
         attributes: {
           :all     => ['class', 'style', 'title', 'id'],
@@ -64,10 +106,45 @@
           'embed' => {'src'  => ['http', 'https']},
           'object' => {'src'  => ['http', 'https']},
           'li' => {'id' => ['fn']},
           'sup' => {'id' => ['fnref']}
         },
-        transformers: EmbedTransformer
+        transformers: transformers
       }
+    end
+
+    def allowed_iframe_hosts
+      # By default, allow everything. Override this to return a regular expression
+      # that will be matched against the iframe/embed's src URL's host.
+      /.*/
+    end
+
+    def transformers
+      [embed_transformer]
+    end
+
+    def embed_transformer
+      lambda do |env|
+        node      = env[:node]
+        node_name = env[:node_name]
+
+        # We're fine with a bunch of stuff -- but not <iframe> and <embed> tags.
+        return if env[:is_whitelisted] || !env[:node].element?
+        return unless %w[iframe embed].include? env[:node_name]
+
+        # We're dealing with an <iframe> or <embed> tag! Let's check its src attribute.
+        # If its host name matches our regular expression, we can whitelist it.
+        uri = URI(env[:node]['src'])
+        return unless uri.host =~ allowed_iframe_hosts
+
+        Sanitize.clean_node!(node, {
+          elements: %w[iframe embed],
+          attributes: {
+            all: %w[allowfullscreen frameborder height src width]
+          }
+        })
+
+        { node_whitelist: [node] }
+      end
     end
   end
 end