(function () { // A quick way to make sure we're only keeping span-level tags when we need to. // This isn't supposed to be foolproof. It's just a quick way to make sure we // keep all span-level tags returned by a pagedown converter. It should allow // all span-level tags through, with or without attributes. var inlineTags = new RegExp(['^(<\\/?(a|abbr|acronym|applet|area|b|basefont|', 'bdo|big|button|cite|code|del|dfn|em|figcaption|', 'font|i|iframe|img|input|ins|kbd|label|map|', 'mark|meter|object|param|progress|q|ruby|rp|rt|s|', 'samp|script|select|small|span|strike|strong|', 'sub|sup|textarea|time|tt|u|var|wbr)[^>]*>|', '<(br)\\s?\\/?>)$'].join(''), 'i'); /****************************************************************** * Utility Functions * *****************************************************************/ // patch for ie7 if (!Array.indexOf) { Array.prototype.indexOf = function(obj) { for (var i = 0; i < this.length; i++) { if (this[i] == obj) { return i; } } return -1; }; } function trim(str) { return str.replace(/^\s+|\s+$/g, ''); } function rtrim(str) { return str.replace(/\s+$/g, ''); } // Remove one level of indentation from text. Indent is 4 spaces. function outdent(text) { return text.replace(new RegExp('^(\\t|[ ]{1,4})', 'gm'), ''); } function contains(str, substr) { return str.indexOf(substr) != -1; } // Sanitize html, removing tags that aren't in the whitelist function sanitizeHtml(html, whitelist) { return html.replace(/<[^>]*>?/gi, function(tag) { return tag.match(whitelist) ? tag : ''; }); } // Merge two arrays, keeping only unique elements. function union(x, y) { var obj = {}; for (var i = 0; i < x.length; i++) obj[x[i]] = x[i]; for (i = 0; i < y.length; i++) obj[y[i]] = y[i]; var res = []; for (var k in obj) { if (obj.hasOwnProperty(k)) res.push(obj[k]); } return res; } // JS regexes don't support \A or \Z, so we add sentinels, as Pagedown // does. In this case, we add the ascii codes for start of text (STX) and // end of text (ETX), an idea borrowed from: // https://github.com/tanakahisateru/js-markdown-extra function addAnchors(text) { if(text.charAt(0) != '\x02') text = '\x02' + text; if(text.charAt(text.length - 1) != '\x03') text = text + '\x03'; return text; } // Remove STX and ETX sentinels. function removeAnchors(text) { if(text.charAt(0) == '\x02') text = text.substr(1); if(text.charAt(text.length - 1) == '\x03') text = text.substr(0, text.length - 1); return text; } // Convert markdown within an element, retaining only span-level tags function convertSpans(text, extra) { return sanitizeHtml(convertAll(text, extra), inlineTags); } // Convert internal markdown using the stock pagedown converter function convertAll(text, extra) { var result = extra.blockGamutHookCallback(text); // We need to perform these operations since we skip the steps in the converter result = unescapeSpecialChars(result); result = result.replace(/~D/g, "$$").replace(/~T/g, "~"); result = extra.previousPostConversion(result); return result; } // Convert escaped special characters function processEscapesStep1(text) { // Markdown extra adds two escapable characters, `:` and `|` return text.replace(/\\\|/g, '~I').replace(/\\:/g, '~i'); } function processEscapesStep2(text) { return text.replace(/~I/g, '|').replace(/~i/g, ':'); } // Duplicated from PageDown converter function unescapeSpecialChars(text) { // Swap back in all the special characters we've hidden. text = text.replace(/~E(\d+)E/g, function(wholeMatch, m1) { var charCodeToReplace = parseInt(m1); return String.fromCharCode(charCodeToReplace); }); return text; } function slugify(text) { return text.toLowerCase() .replace(/\s+/g, '-') // Replace spaces with - .replace(/[^\w\-]+/g, '') // Remove all non-word chars .replace(/\-\-+/g, '-') // Replace multiple - with single - .replace(/^-+/, '') // Trim - from start of text .replace(/-+$/, ''); // Trim - from end of text } /***************************************************************************** * Markdown.Extra * ****************************************************************************/ Markdown.Extra = function() { // For converting internal markdown (in tables for instance). // This is necessary since these methods are meant to be called as // preConversion hooks, and the Markdown converter passed to init() // won't convert any markdown contained in the html tags we return. this.converter = null; // Stores html blocks we generate in hooks so that // they're not destroyed if the user is using a sanitizing converter this.hashBlocks = []; // Stores footnotes this.footnotes = {}; this.usedFootnotes = []; // Special attribute blocks for fenced code blocks and headers enabled. this.attributeBlocks = false; // Fenced code block options this.googleCodePrettify = false; this.highlightJs = false; // Table options this.tableClass = ''; this.tabWidth = 4; }; Markdown.Extra.init = function(converter, options) { // Each call to init creates a new instance of Markdown.Extra so it's // safe to have multiple converters, with different options, on a single page var extra = new Markdown.Extra(); var postNormalizationTransformations = []; var preBlockGamutTransformations = []; var postSpanGamutTransformations = []; var postConversionTransformations = ["unHashExtraBlocks"]; options = options || {}; options.extensions = options.extensions || ["all"]; if (contains(options.extensions, "all")) { options.extensions = ["tables", "fenced_code_gfm", "def_list", "attr_list", "footnotes", "smartypants", "strikethrough", "newlines"]; } preBlockGamutTransformations.push("wrapHeaders"); if (contains(options.extensions, "attr_list")) { postNormalizationTransformations.push("hashFcbAttributeBlocks"); preBlockGamutTransformations.push("hashHeaderAttributeBlocks"); postConversionTransformations.push("applyAttributeBlocks"); extra.attributeBlocks = true; } if (contains(options.extensions, "fenced_code_gfm")) { // This step will convert fcb inside list items and blockquotes preBlockGamutTransformations.push("fencedCodeBlocks"); // This extra step is to prevent html blocks hashing and link definition/footnotes stripping inside fcb postNormalizationTransformations.push("fencedCodeBlocks"); } if (contains(options.extensions, "tables")) { preBlockGamutTransformations.push("tables"); } if (contains(options.extensions, "def_list")) { preBlockGamutTransformations.push("definitionLists"); } if (contains(options.extensions, "footnotes")) { postNormalizationTransformations.push("stripFootnoteDefinitions"); preBlockGamutTransformations.push("doFootnotes"); postConversionTransformations.push("printFootnotes"); } if (contains(options.extensions, "smartypants")) { postConversionTransformations.push("runSmartyPants"); } if (contains(options.extensions, "strikethrough")) { postSpanGamutTransformations.push("strikethrough"); } if (contains(options.extensions, "newlines")) { postSpanGamutTransformations.push("newlines"); } converter.hooks.chain("postNormalization", function(text) { return extra.doTransform(postNormalizationTransformations, text) + '\n'; }); converter.hooks.chain("preBlockGamut", function(text, blockGamutHookCallback) { // Keep a reference to the block gamut callback to run recursively extra.blockGamutHookCallback = blockGamutHookCallback; text = processEscapesStep1(text); text = extra.doTransform(preBlockGamutTransformations, text) + '\n'; text = processEscapesStep2(text); return text; }); converter.hooks.chain("postSpanGamut", function(text) { return extra.doTransform(postSpanGamutTransformations, text); }); // Keep a reference to the hook chain running before doPostConversion to apply on hashed extra blocks extra.previousPostConversion = converter.hooks.postConversion; converter.hooks.chain("postConversion", function(text) { text = extra.doTransform(postConversionTransformations, text); // Clear state vars that may use unnecessary memory extra.hashBlocks = []; extra.footnotes = {}; extra.usedFootnotes = []; return text; }); if ("highlighter" in options) { extra.googleCodePrettify = options.highlighter === 'prettify'; extra.highlightJs = options.highlighter === 'highlight'; } if ("table_class" in options) { extra.tableClass = options.table_class; } extra.converter = converter; // Caller usually won't need this, but it's handy for testing. return extra; }; // Do transformations Markdown.Extra.prototype.doTransform = function(transformations, text) { for(var i = 0; i < transformations.length; i++) text = this[transformations[i]](text); return text; }; // Return a placeholder containing a key, which is the block's index in the // hashBlocks array. We wrap our output in a
tag here so Pagedown won't. Markdown.Extra.prototype.hashExtraBlock = function(block) { return '\n
~X' + (this.hashBlocks.push(block) - 1) + 'X
\n'; }; Markdown.Extra.prototype.hashExtraInline = function(block) { return '~X' + (this.hashBlocks.push(block) - 1) + 'X'; }; // Replace placeholder blocks in `text` with their corresponding // html blocks in the hashBlocks array. Markdown.Extra.prototype.unHashExtraBlocks = function(text) { var self = this; function recursiveUnHash() { var hasHash = false; text = text.replace(/(?:)?~X(\d+)X(?:<\/p>)?/g, function(wholeMatch, m1) { hasHash = true; var key = parseInt(m1, 10); return self.hashBlocks[key]; }); if(hasHash === true) { recursiveUnHash(); } } recursiveUnHash(); return text; }; // Wrap headers to make sure they won't be in def lists Markdown.Extra.prototype.wrapHeaders = function(text) { function wrap(text) { return '\n' + text + '\n'; } text = text.replace(/^.+[ \t]*\n=+[ \t]*\n+/gm, wrap); text = text.replace(/^.+[ \t]*\n-+[ \t]*\n+/gm, wrap); text = text.replace(/^\#{1,6}[ \t]*.+?[ \t]*\#*\n+/gm, wrap); return text; }; /****************************************************************** * Attribute Blocks * *****************************************************************/ // TODO: use sentinels. Should we just add/remove them in doConversion? // TODO: better matches for id / class attributes var attrBlock = "\\{[ \\t]*((?:[#.][-_:a-zA-Z0-9]+[ \\t]*)+)\\}"; var hdrAttributesA = new RegExp("^(#{1,6}.*#{0,6})[ \\t]+" + attrBlock + "[ \\t]*(?:\\n|0x03)", "gm"); var hdrAttributesB = new RegExp("^(.*)[ \\t]+" + attrBlock + "[ \\t]*\\n" + "(?=[\\-|=]+\\s*(?:\\n|0x03))", "gm"); // underline lookahead var fcbAttributes = new RegExp("^(```[ \\t]*[^{\\s]*)[ \\t]+" + attrBlock + "[ \\t]*\\n" + "(?=([\\s\\S]*?)\\n```[ \\t]*(\\n|0x03))", "gm"); // Extract headers attribute blocks, move them above the element they will be // applied to, and hash them for later. Markdown.Extra.prototype.hashHeaderAttributeBlocks = function(text) { var self = this; function attributeCallback(wholeMatch, pre, attr) { return '
~XX' + (self.hashBlocks.push(attr) - 1) + 'XX
\n' + pre + "\n"; } text = text.replace(hdrAttributesA, attributeCallback); // ## headers text = text.replace(hdrAttributesB, attributeCallback); // underline headers return text; }; // Extract FCB attribute blocks, move them above the element they will be // applied to, and hash them for later. Markdown.Extra.prototype.hashFcbAttributeBlocks = function(text) { // TODO: use sentinels. Should we just add/remove them in doConversion? // TODO: better matches for id / class attributes var self = this; function attributeCallback(wholeMatch, pre, attr) { return '~XX' + (self.hashBlocks.push(attr) - 1) + 'XX
\n' + pre + "\n"; } return text.replace(fcbAttributes, attributeCallback); }; Markdown.Extra.prototype.applyAttributeBlocks = function(text) { var self = this; var blockRe = new RegExp('~XX(\\d+)XX
[\\s]*' + '(?:<(h[1-6]|pre)(?: +class="(\\S+)")?(>[\\s\\S]*?\\2>))', "gm"); text = text.replace(blockRe, function(wholeMatch, k, tag, cls, rest) { if (!tag) // no following header or fenced code block. return ''; // get attributes list from hash var key = parseInt(k, 10); var attributes = self.hashBlocks[key]; // get id var id = attributes.match(/#[^\s#.]+/g) || []; var idStr = id[0] ? ' id="' + id[0].substr(1, id[0].length - 1) + '"' : ''; // get classes and merge with existing classes var classes = attributes.match(/\.[^\s#.]+/g) || []; for (var i = 0; i < classes.length; i++) // Remove leading dot classes[i] = classes[i].substr(1, classes[i].length - 1); var classStr = ''; if (cls) classes = union(classes, [cls]); if (classes.length > 0) classStr = ' class="' + classes.join(' ') + '"'; return "<" + tag + idStr + classStr + rest; }); return text; }; /****************************************************************** * Tables * *****************************************************************/ // Find and convert Markdown Extra tables into html. Markdown.Extra.prototype.tables = function(text) { var self = this; var leadingPipe = new RegExp( ['^' , '[ ]{0,3}' , // Allowed whitespace '[|]' , // Initial pipe '(.+)\\n' , // $1: Header Row '[ ]{0,3}' , // Allowed whitespace '[|]([ ]*[-:]+[-| :]*)\\n' , // $2: Separator '(' , // $3: Table Body '(?:[ ]*[|].*\\n?)*' , // Table rows ')', '(?:\\n|$)' // Stop at final newline ].join(''), 'gm' ); var noLeadingPipe = new RegExp( ['^' , '[ ]{0,3}' , // Allowed whitespace '(\\S.*[|].*)\\n' , // $1: Header Row '[ ]{0,3}' , // Allowed whitespace '([-:]+[ ]*[|][-| :]*)\\n' , // $2: Separator '(' , // $3: Table Body '(?:.*[|].*\\n?)*' , // Table rows ')' , '(?:\\n|$)' // Stop at final newline ].join(''), 'gm' ); text = text.replace(leadingPipe, doTable); text = text.replace(noLeadingPipe, doTable); // $1 = header, $2 = separator, $3 = body function doTable(match, header, separator, body, offset, string) { // remove any leading pipes and whitespace header = header.replace(/^ *[|]/m, ''); separator = separator.replace(/^ *[|]/m, ''); body = body.replace(/^ *[|]/gm, ''); // remove trailing pipes and whitespace header = header.replace(/[|] *$/m, ''); separator = separator.replace(/[|] *$/m, ''); body = body.replace(/[|] *$/gm, ''); // determine column alignments alignspecs = separator.split(/ *[|] */); align = []; for (var i = 0; i < alignspecs.length; i++) { var spec = alignspecs[i]; if (spec.match(/^ *-+: *$/m)) align[i] = ' style="text-align:right;"'; else if (spec.match(/^ *:-+: *$/m)) align[i] = ' style="text-align:center;"'; else if (spec.match(/^ *:-+ *$/m)) align[i] = ' style="text-align:left;"'; else align[i] = ''; } // TODO: parse spans in header and rows before splitting, so that pipes // inside of tags are not interpreted as separators var headers = header.split(/ *[|] */); var colCount = headers.length; // build html var cls = self.tableClass ? ' class="' + self.tableClass + '"' : ''; var html = ['", headerHtml, " | \n"].join(''); } html += "
", colHtml, " | \n"].join(''); } html += "
encodeCode(codeblock), '
// replace codeblock with placeholder until postConversion step
return self.hashExtraBlock(html);
return text;
* SmartyPants *
Markdown.Extra.prototype.educatePants = function(text) {
var self = this;
var result = '';
var blockOffset = 0;
// Here we parse HTML in a very bad manner
text.replace(/(?:)|(<)([a-zA-Z1-6]+)([^\n]*?>)([\s\S]*?)(<\/\2>)/g, function(wholeMatch, m1, m2, m3, m4, m5, offset) {
var token = text.substring(blockOffset, offset);
result += self.applyPants(token);
self.smartyPantsLastChar = result.substring(result.length - 1);
blockOffset = offset + wholeMatch.length;
if(!m1) {
// Skip commentary
result += wholeMatch;
// Skip special tags
if(!/code|kbd|pre|script|noscript|iframe|math|ins|del|pre/i.test(m2)) {
m4 = self.educatePants(m4);
else {
self.smartyPantsLastChar = m4.substring(m4.length - 1);
result += m1 + m2 + m3 + m4 + m5;
var lastToken = text.substring(blockOffset);
result += self.applyPants(lastToken);
self.smartyPantsLastChar = result.substring(result.length - 1);
return result;
function revertPants(wholeMatch, m1) {
var blockText = m1;
blockText = blockText.replace(/&\#8220;/g, "\"");
blockText = blockText.replace(/&\#8221;/g, "\"");
blockText = blockText.replace(/&\#8216;/g, "'");
blockText = blockText.replace(/&\#8217;/g, "'");
blockText = blockText.replace(/&\#8212;/g, "---");
blockText = blockText.replace(/&\#8211;/g, "--");
blockText = blockText.replace(/&\#8230;/g, "...");
return blockText;
Markdown.Extra.prototype.applyPants = function(text) {
// Dashes
text = text.replace(/---/g, "—").replace(/--/g, "–");
// Ellipses
text = text.replace(/\.\.\./g, "…").replace(/\.\s\.\s\./g, "…");
// Backticks
text = text.replace(/``/g, "“").replace (/''/g, "”");
if(/^'$/.test(text)) {
// Special case: single-character ' token
if(/\S/.test(this.smartyPantsLastChar)) {
return "’";
return "‘";
if(/^"$/.test(text)) {
// Special case: single-character " token
if(/\S/.test(this.smartyPantsLastChar)) {
return "”";
return "“";
// Special case if the very first character is a quote
// followed by punctuation at a non-word-break. Close the quotes by brute force:
text = text.replace (/^'(?=[!"#\$\%'()*+,\-.\/:;<=>?\@\[\\]\^_`{|}~]\B)/, "’");
text = text.replace (/^"(?=[!"#\$\%'()*+,\-.\/:;<=>?\@\[\\]\^_`{|}~]\B)/, "”");
// Special case for double sets of quotes, e.g.:
// He said, "'Quoted' words in a larger quote."
text = text.replace(/"'(?=\w)/g, "“‘"); text = text.replace(/'"(?=\w)/g, "‘“"); // Special case for decade abbreviations (the '80s): text = text.replace(/'(?=\d{2}s)/g, "’"); // Get most opening single quotes: text = text.replace(/(\s| |--|&[mn]dash;|&\#8211;|&\#8212;|&\#x201[34];)'(?=\w)/g, "$1‘"); // Single closing quotes: text = text.replace(/([^\s\[\{\(\-])'/g, "$1’"); text = text.replace(/'(?=\s|s\b)/g, "’"); // Any remaining single quotes should be opening ones: text = text.replace(/'/g, "‘"); // Get most opening double quotes: text = text.replace(/(\s| |--|&[mn]dash;|&\#8211;|&\#8212;|&\#x201[34];)"(?=\w)/g, "$1“"); // Double closing quotes: text = text.replace(/([^\s\[\{\(\-])"/g, "$1”"); text = text.replace(/"(?=\s)/g, "”"); // Any remaining quotes should be opening ones. text = text.replace(/"/ig, "“"); return text; }; // Find and convert markdown extra definition lists into html. Markdown.Extra.prototype.runSmartyPants = function(text) { this.smartyPantsLastChar = ''; text = this.educatePants(text); // Clean everything inside html tags (some of them may have been converted due to our rough html parsing) text = text.replace(/(<([a-zA-Z1-6]+)\b([^\n>]*?)(\/)?>)/g, revertPants); return text; }; /****************************************************************** * Definition Lists * ******************************************************************/ // Find and convert markdown extra definition lists into html. Markdown.Extra.prototype.definitionLists = function(text) { var wholeList = new RegExp( ['(\\x02\\n?|\\n\\n)' , '(?:' , '(' , // $1 = whole list '(' , // $2 '[ ]{0,3}' , '((?:[ \\t]*\\S.*\\n)+)', // $3 = defined term '\\n?' , '[ ]{0,3}:[ ]+' , // colon starting definition ')' , '([\\s\\S]+?)' , '(' , // $4 '(?=\\0x03)' , // \z '|' , '(?=' , '\\n{2,}' , '(?=\\S)' , '(?!' , // Negative lookahead for another term '[ ]{0,3}' , '(?:\\S.*\\n)+?' , // defined term '\\n?' , '[ ]{0,3}:[ ]+' , // colon starting definition ')' , '(?!' , // Negative lookahead for another definition '[ ]{0,3}:[ ]+' , // colon starting definition ')' , ')' , ')' , ')' , ')' ].join(''), 'gm' ); var self = this; text = addAnchors(text); text = text.replace(wholeList, function(match, pre, list) { var result = trim(self.processDefListItems(list)); result = "