app/assets/javascripts/markdown.js in rails-bootstrap-markdown-1.0.0 vs app/assets/javascripts/markdown.js in rails-bootstrap-markdown-2.6.0

- old
+ new

@@ -1,1642 +1,1740 @@ // Released under MIT license // Copyright (c) 2009-2010 Dominic Baggott // Copyright (c) 2009-2010 Ash Berlin // Copyright (c) 2011 Christoph Dorn <christoph@christophdorn.com> (http://www.christophdorn.com) +// Date: 2013-09-15T16:12Z -/*jshint browser:true, devel:true */ +(function(expose) { -(function( expose ) { -/** - * class Markdown - * - * Markdown processing in Javascript done right. We have very particular views - * on what constitutes 'right' which include: - * - * - produces well-formed HTML (this means that em and strong nesting is - * important) - * - * - has an intermediate representation to allow processing of parsed data (We - * in fact have two, both as [JsonML]: a markdown tree and an HTML tree). - * - * - is easily extensible to add new dialects without having to rewrite the - * entire parsing mechanics - * - * - has a good test suite - * - * This implementation fulfills all of these (except that the test suite could - * do with expanding to automatically run all the fixtures from other Markdown - * implementations.) - * - * ##### Intermediate Representation - * - * *TODO* Talk about this :) Its JsonML, but document the node names we use. - * - * [JsonML]: http://jsonml.org/ "JSON Markup Language" - **/ -var Markdown = expose.Markdown = function(dialect) { - switch (typeof dialect) { + + + var MarkdownHelpers = {}; + + // For Spidermonkey based engines + function mk_block_toSource() { + return "Markdown.mk_block( " + + uneval(this.toString()) + + ", " + + uneval(this.trailing) + + ", " + + uneval(this.lineNumber) + + " )"; + } + + // node + function mk_block_inspect() { + var util = require("util"); + return "Markdown.mk_block( " + + util.inspect(this.toString()) + + ", " + + util.inspect(this.trailing) + + ", " + + util.inspect(this.lineNumber) + + " )"; + + } + + MarkdownHelpers.mk_block = function(block, trail, line) { + // Be helpful for default case in tests. + if ( arguments.length === 1 ) + trail = "\n\n"; + + // We actually need a String object, not a string primitive + /* jshint -W053 */ + var s = new String(block); + s.trailing = trail; + // To make it clear its not just a string + s.inspect = mk_block_inspect; + s.toSource = mk_block_toSource; + + if ( line !== undefined ) + s.lineNumber = line; + + return s; + }; + + + var isArray = MarkdownHelpers.isArray = Array.isArray || function(obj) { + return Object.prototype.toString.call(obj) === "[object Array]"; + }; + + // Don't mess with Array.prototype. Its not friendly + if ( Array.prototype.forEach ) { + MarkdownHelpers.forEach = function forEach( arr, cb, thisp ) { + return arr.forEach( cb, thisp ); + }; + } + else { + MarkdownHelpers.forEach = function forEach(arr, cb, thisp) { + for (var i = 0; i < arr.length; i++) + cb.call(thisp || arr, arr[i], i, arr); + }; + } + + MarkdownHelpers.isEmpty = function isEmpty( obj ) { + for ( var key in obj ) { + if ( hasOwnProperty.call( obj, key ) ) + return false; + } + return true; + }; + + MarkdownHelpers.extract_attr = function extract_attr( jsonml ) { + return isArray(jsonml) + && jsonml.length > 1 + && typeof jsonml[ 1 ] === "object" + && !( isArray(jsonml[ 1 ]) ) + ? jsonml[ 1 ] + : undefined; + }; + + + + + /** + * class Markdown + * + * Markdown processing in Javascript done right. We have very particular views + * on what constitutes 'right' which include: + * + * - produces well-formed HTML (this means that em and strong nesting is + * important) + * + * - has an intermediate representation to allow processing of parsed data (We + * in fact have two, both as [JsonML]: a markdown tree and an HTML tree). + * + * - is easily extensible to add new dialects without having to rewrite the + * entire parsing mechanics + * + * - has a good test suite + * + * This implementation fulfills all of these (except that the test suite could + * do with expanding to automatically run all the fixtures from other Markdown + * implementations.) + * + * ##### Intermediate Representation + * + * *TODO* Talk about this :) Its JsonML, but document the node names we use. + * + * [JsonML]: http://jsonml.org/ "JSON Markup Language" + **/ + var Markdown = function(dialect) { + switch (typeof dialect) { case "undefined": this.dialect = Markdown.dialects.Gruber; break; case "object": this.dialect = dialect; break; default: - if ( dialect in Markdown.dialects ) { + if ( dialect in Markdown.dialects ) this.dialect = Markdown.dialects[dialect]; - } - else { + else throw new Error("Unknown Markdown dialect '" + String(dialect) + "'"); - } break; - } - this.em_state = []; - this.strong_state = []; - this.debug_indent = ""; -}; + } + this.em_state = []; + this.strong_state = []; + this.debug_indent = ""; + }; -/** - * parse( markdown, [dialect] ) -> JsonML - * - markdown (String): markdown string to parse - * - dialect (String | Dialect): the dialect to use, defaults to gruber - * - * Parse `markdown` and return a markdown document as a Markdown.JsonML tree. - **/ -expose.parse = function( source, dialect ) { - // dialect will default if undefined - var md = new Markdown( dialect ); - return md.toTree( source ); -}; + /** + * Markdown.dialects + * + * Namespace of built-in dialects. + **/ + Markdown.dialects = {}; -/** - * toHTML( markdown, [dialect] ) -> String - * toHTML( md_tree ) -> String - * - markdown (String): markdown string to parse - * - md_tree (Markdown.JsonML): parsed markdown tree - * - * Take markdown (either as a string or as a JsonML tree) and run it through - * [[toHTMLTree]] then turn it into a well-formated HTML fragment. - **/ -expose.toHTML = function toHTML( source , dialect , options ) { - var input = expose.toHTMLTree( source , dialect , options ); - return expose.renderJsonML( input ); -}; -/** - * toHTMLTree( markdown, [dialect] ) -> JsonML - * toHTMLTree( md_tree ) -> JsonML - * - markdown (String): markdown string to parse - * - dialect (String | Dialect): the dialect to use, defaults to gruber - * - md_tree (Markdown.JsonML): parsed markdown tree - * - * Turn markdown into HTML, represented as a JsonML tree. If a string is given - * to this function, it is first parsed into a markdown tree by calling - * [[parse]]. - **/ -expose.toHTMLTree = function toHTMLTree( input, dialect , options ) { - // convert string input to an MD tree - if ( typeof input ==="string" ) input = this.parse( input, dialect ); - // Now convert the MD tree to an HTML tree + // Imported functions + var mk_block = Markdown.mk_block = MarkdownHelpers.mk_block, + isArray = MarkdownHelpers.isArray; - // remove references from the tree - var attrs = extract_attr( input ), - refs = {}; + /** + * parse( markdown, [dialect] ) -> JsonML + * - markdown (String): markdown string to parse + * - dialect (String | Dialect): the dialect to use, defaults to gruber + * + * Parse `markdown` and return a markdown document as a Markdown.JsonML tree. + **/ + Markdown.parse = function( source, dialect ) { + // dialect will default if undefined + var md = new Markdown( dialect ); + return md.toTree( source ); + }; - if ( attrs && attrs.references ) { - refs = attrs.references; + function count_lines( str ) { + var n = 0, + i = -1; + while ( ( i = str.indexOf("\n", i + 1) ) !== -1 ) + n++; + return n; } - var html = convert_tree_to_html( input, refs , options ); - merge_text_nodes( html ); - return html; -}; + // Internal - split source into rough blocks + Markdown.prototype.split_blocks = function splitBlocks( input ) { + input = input.replace(/(\r\n|\n|\r)/g, "\n"); + // [\s\S] matches _anything_ (newline or space) + // [^] is equivalent but doesn't work in IEs. + var re = /([\s\S]+?)($|\n#|\n(?:\s*\n|$)+)/g, + blocks = [], + m; -// For Spidermonkey based engines -function mk_block_toSource() { - return "Markdown.mk_block( " + - uneval(this.toString()) + - ", " + - uneval(this.trailing) + - ", " + - uneval(this.lineNumber) + - " )"; -} + var line_no = 1; -// node -function mk_block_inspect() { - var util = require("util"); - return "Markdown.mk_block( " + - util.inspect(this.toString()) + - ", " + - util.inspect(this.trailing) + - ", " + - util.inspect(this.lineNumber) + - " )"; + if ( ( m = /^(\s*\n)/.exec(input) ) !== null ) { + // skip (but count) leading blank lines + line_no += count_lines( m[0] ); + re.lastIndex = m[0].length; + } -} + while ( ( m = re.exec(input) ) !== null ) { + if (m[2] === "\n#") { + m[2] = "\n"; + re.lastIndex--; + } + blocks.push( mk_block( m[1], m[2], line_no ) ); + line_no += count_lines( m[0] ); + } -var mk_block = Markdown.mk_block = function(block, trail, line) { - // Be helpful for default case in tests. - if ( arguments.length == 1 ) trail = "\n\n"; + return blocks; + }; - var s = new String(block); - s.trailing = trail; - // To make it clear its not just a string - s.inspect = mk_block_inspect; - s.toSource = mk_block_toSource; + /** + * Markdown#processBlock( block, next ) -> undefined | [ JsonML, ... ] + * - block (String): the block to process + * - next (Array): the following blocks + * + * Process `block` and return an array of JsonML nodes representing `block`. + * + * It does this by asking each block level function in the dialect to process + * the block until one can. Succesful handling is indicated by returning an + * array (with zero or more JsonML nodes), failure by a false value. + * + * Blocks handlers are responsible for calling [[Markdown#processInline]] + * themselves as appropriate. + * + * If the blocks were split incorrectly or adjacent blocks need collapsing you + * can adjust `next` in place using shift/splice etc. + * + * If any of this default behaviour is not right for the dialect, you can + * define a `__call__` method on the dialect that will get invoked to handle + * the block processing. + */ + Markdown.prototype.processBlock = function processBlock( block, next ) { + var cbs = this.dialect.block, + ord = cbs.__order__; - if ( line != undefined ) - s.lineNumber = line; + if ( "__call__" in cbs ) + return cbs.__call__.call(this, block, next); - return s; -}; + for ( var i = 0; i < ord.length; i++ ) { + //D:this.debug( "Testing", ord[i] ); + var res = cbs[ ord[i] ].call( this, block, next ); + if ( res ) { + //D:this.debug(" matched"); + if ( !isArray(res) || ( res.length > 0 && !( isArray(res[0]) ) ) ) + this.debug(ord[i], "didn't return a proper array"); + //D:this.debug( "" ); + return res; + } + } -function count_lines( str ) { - var n = 0, i = -1; - while ( ( i = str.indexOf("\n", i + 1) ) !== -1 ) n++; - return n; -} + // Uhoh! no match! Should we throw an error? + return []; + }; -// Internal - split source into rough blocks -Markdown.prototype.split_blocks = function splitBlocks( input, startLine ) { - input = input.replace(/(\r\n|\n|\r)/g, "\n"); - // [\s\S] matches _anything_ (newline or space) - var re = /([\s\S]+?)($|\n(?:\s*\n|$)+)/g, - blocks = [], - m; + Markdown.prototype.processInline = function processInline( block ) { + return this.dialect.inline.__call__.call( this, String( block ) ); + }; - var line_no = 1; + /** + * Markdown#toTree( source ) -> JsonML + * - source (String): markdown source to parse + * + * Parse `source` into a JsonML tree representing the markdown document. + **/ + // custom_tree means set this.tree to `custom_tree` and restore old value on return + Markdown.prototype.toTree = function toTree( source, custom_root ) { + var blocks = source instanceof Array ? source : this.split_blocks( source ); - if ( ( m = /^(\s*\n)/.exec(input) ) != null ) { - // skip (but count) leading blank lines - line_no += count_lines( m[0] ); - re.lastIndex = m[0].length; - } + // Make tree a member variable so its easier to mess with in extensions + var old_tree = this.tree; + try { + this.tree = custom_root || this.tree || [ "markdown" ]; - while ( ( m = re.exec(input) ) !== null ) { - blocks.push( mk_block( m[1], m[2], line_no ) ); - line_no += count_lines( m[0] ); - } + blocks_loop: + while ( blocks.length ) { + var b = this.processBlock( blocks.shift(), blocks ); - return blocks; -}; + // Reference blocks and the like won't return any content + if ( !b.length ) + continue blocks_loop; -/** - * Markdown#processBlock( block, next ) -> undefined | [ JsonML, ... ] - * - block (String): the block to process - * - next (Array): the following blocks - * - * Process `block` and return an array of JsonML nodes representing `block`. - * - * It does this by asking each block level function in the dialect to process - * the block until one can. Succesful handling is indicated by returning an - * array (with zero or more JsonML nodes), failure by a false value. - * - * Blocks handlers are responsible for calling [[Markdown#processInline]] - * themselves as appropriate. - * - * If the blocks were split incorrectly or adjacent blocks need collapsing you - * can adjust `next` in place using shift/splice etc. - * - * If any of this default behaviour is not right for the dialect, you can - * define a `__call__` method on the dialect that will get invoked to handle - * the block processing. - */ -Markdown.prototype.processBlock = function processBlock( block, next ) { - var cbs = this.dialect.block, - ord = cbs.__order__; + this.tree.push.apply( this.tree, b ); + } + return this.tree; + } + finally { + if ( custom_root ) + this.tree = old_tree; + } + }; - if ( "__call__" in cbs ) { - return cbs.__call__.call(this, block, next); - } + // Noop by default + Markdown.prototype.debug = function () { + var args = Array.prototype.slice.call( arguments); + args.unshift(this.debug_indent); + if ( typeof print !== "undefined" ) + print.apply( print, args ); + if ( typeof console !== "undefined" && typeof console.log !== "undefined" ) + console.log.apply( null, args ); + }; - for ( var i = 0; i < ord.length; i++ ) { - //D:this.debug( "Testing", ord[i] ); - var res = cbs[ ord[i] ].call( this, block, next ); - if ( res ) { - //D:this.debug(" matched"); - if ( !isArray(res) || ( res.length > 0 && !( isArray(res[0]) ) ) ) - this.debug(ord[i], "didn't return a proper array"); - //D:this.debug( "" ); - return res; + Markdown.prototype.loop_re_over_block = function( re, block, cb ) { + // Dont use /g regexps with this + var m, + b = block.valueOf(); + + while ( b.length && (m = re.exec(b) ) !== null ) { + b = b.substr( m[0].length ); + cb.call(this, m); } - } + return b; + }; - // Uhoh! no match! Should we throw an error? - return []; -}; + // Build default order from insertion order. + Markdown.buildBlockOrder = function(d) { + var ord = []; + for ( var i in d ) { + if ( i === "__order__" || i === "__call__" ) + continue; + ord.push( i ); + } + d.__order__ = ord; + }; -Markdown.prototype.processInline = function processInline( block ) { - return this.dialect.inline.__call__.call( this, String( block ) ); -}; + // Build patterns for inline matcher + Markdown.buildInlinePatterns = function(d) { + var patterns = []; -/** - * Markdown#toTree( source ) -> JsonML - * - source (String): markdown source to parse - * - * Parse `source` into a JsonML tree representing the markdown document. - **/ -// custom_tree means set this.tree to `custom_tree` and restore old value on return -Markdown.prototype.toTree = function toTree( source, custom_root ) { - var blocks = source instanceof Array ? source : this.split_blocks( source ); + for ( var i in d ) { + // __foo__ is reserved and not a pattern + if ( i.match( /^__.*__$/) ) + continue; + var l = i.replace( /([\\.*+?|()\[\]{}])/g, "\\$1" ) + .replace( /\n/, "\\n" ); + patterns.push( i.length === 1 ? l : "(?:" + l + ")" ); + } - // Make tree a member variable so its easier to mess with in extensions - var old_tree = this.tree; - try { - this.tree = custom_root || this.tree || [ "markdown" ]; + patterns = patterns.join("|"); + d.__patterns__ = patterns; + //print("patterns:", uneval( patterns ) ); - blocks: - while ( blocks.length ) { - var b = this.processBlock( blocks.shift(), blocks ); + var fn = d.__call__; + d.__call__ = function(text, pattern) { + if ( pattern !== undefined ) + return fn.call(this, text, pattern); + else + return fn.call(this, text, patterns); + }; + }; - // Reference blocks and the like won't return any content - if ( !b.length ) continue blocks; - this.tree.push.apply( this.tree, b ); - } - return this.tree; - } - finally { - if ( custom_root ) { - this.tree = old_tree; - } - } -}; -// Noop by default -Markdown.prototype.debug = function () { - var args = Array.prototype.slice.call( arguments); - args.unshift(this.debug_indent); - if ( typeof print !== "undefined" ) - print.apply( print, args ); - if ( typeof console !== "undefined" && typeof console.log !== "undefined" ) - console.log.apply( null, args ); -} -Markdown.prototype.loop_re_over_block = function( re, block, cb ) { - // Dont use /g regexps with this - var m, - b = block.valueOf(); + var extract_attr = MarkdownHelpers.extract_attr; - while ( b.length && (m = re.exec(b) ) != null ) { - b = b.substr( m[0].length ); - cb.call(this, m); - } - return b; -}; + /** + * renderJsonML( jsonml[, options] ) -> String + * - jsonml (Array): JsonML array to render to XML + * - options (Object): options + * + * Converts the given JsonML into well-formed XML. + * + * The options currently understood are: + * + * - root (Boolean): wether or not the root node should be included in the + * output, or just its children. The default `false` is to not include the + * root itself. + */ + Markdown.renderJsonML = function( jsonml, options ) { + options = options || {}; + // include the root element in the rendered output? + options.root = options.root || false; -/** - * Markdown.dialects - * - * Namespace of built-in dialects. - **/ -Markdown.dialects = {}; + var content = []; -/** - * Markdown.dialects.Gruber - * - * The default dialect that follows the rules set out by John Gruber's - * markdown.pl as closely as possible. Well actually we follow the behaviour of - * that script which in some places is not exactly what the syntax web page - * says. - **/ -Markdown.dialects.Gruber = { - block: { - atxHeader: function atxHeader( block, next ) { - var m = block.match( /^(#{1,6})\s*(.*?)\s*#*\s*(?:\n|$)/ ); + if ( options.root ) { + content.push( render_tree( jsonml ) ); + } + else { + jsonml.shift(); // get rid of the tag + if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) + jsonml.shift(); // get rid of the attributes - if ( !m ) return undefined; + while ( jsonml.length ) + content.push( render_tree( jsonml.shift() ) ); + } - var header = [ "header", { level: m[ 1 ].length } ]; - Array.prototype.push.apply(header, this.processInline(m[ 2 ])); + return content.join( "\n\n" ); + }; - if ( m[0].length < block.length ) - next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); - return [ header ]; - }, + /** + * toHTMLTree( markdown, [dialect] ) -> JsonML + * toHTMLTree( md_tree ) -> JsonML + * - markdown (String): markdown string to parse + * - dialect (String | Dialect): the dialect to use, defaults to gruber + * - md_tree (Markdown.JsonML): parsed markdown tree + * + * Turn markdown into HTML, represented as a JsonML tree. If a string is given + * to this function, it is first parsed into a markdown tree by calling + * [[parse]]. + **/ + Markdown.toHTMLTree = function toHTMLTree( input, dialect , options ) { - setextHeader: function setextHeader( block, next ) { - var m = block.match( /^(.*)\n([-=])\2\2+(?:\n|$)/ ); + // convert string input to an MD tree + if ( typeof input === "string" ) + input = this.parse( input, dialect ); - if ( !m ) return undefined; + // Now convert the MD tree to an HTML tree - var level = ( m[ 2 ] === "=" ) ? 1 : 2; - var header = [ "header", { level : level }, m[ 1 ] ]; + // remove references from the tree + var attrs = extract_attr( input ), + refs = {}; - if ( m[0].length < block.length ) - next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); + if ( attrs && attrs.references ) + refs = attrs.references; - return [ header ]; - }, + var html = convert_tree_to_html( input, refs , options ); + merge_text_nodes( html ); + return html; + }; - code: function code( block, next ) { - // | Foo - // |bar - // should be a code block followed by a paragraph. Fun - // - // There might also be adjacent code block to merge. + /** + * toHTML( markdown, [dialect] ) -> String + * toHTML( md_tree ) -> String + * - markdown (String): markdown string to parse + * - md_tree (Markdown.JsonML): parsed markdown tree + * + * Take markdown (either as a string or as a JsonML tree) and run it through + * [[toHTMLTree]] then turn it into a well-formated HTML fragment. + **/ + Markdown.toHTML = function toHTML( source , dialect , options ) { + var input = this.toHTMLTree( source , dialect , options ); - var ret = [], - re = /^(?: {0,3}\t| {4})(.*)\n?/, - lines; + return this.renderJsonML( input ); + }; - // 4 spaces + content - if ( !block.match( re ) ) return undefined; - block_search: - do { - // Now pull out the rest of the lines - var b = this.loop_re_over_block( - re, block.valueOf(), function( m ) { ret.push( m[1] ); } ); + function escapeHTML( text ) { + return text.replace( /&/g, "&amp;" ) + .replace( /</g, "&lt;" ) + .replace( />/g, "&gt;" ) + .replace( /"/g, "&quot;" ) + .replace( /'/g, "&#39;" ); + } - if ( b.length ) { - // Case alluded to in first comment. push it back on as a new block - next.unshift( mk_block(b, block.trailing) ); - break block_search; - } - else if ( next.length ) { - // Check the next block - it might be code too - if ( !next[0].match( re ) ) break block_search; + function render_tree( jsonml ) { + // basic case + if ( typeof jsonml === "string" ) + return escapeHTML( jsonml ); - // Pull how how many blanks lines follow - minus two to account for .join - ret.push ( block.trailing.replace(/[^\n]/g, "").substring(2) ); + var tag = jsonml.shift(), + attributes = {}, + content = []; - block = next.shift(); - } - else { - break block_search; - } - } while ( true ); + if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) + attributes = jsonml.shift(); - return [ [ "code_block", ret.join("\n") ] ]; - }, + while ( jsonml.length ) + content.push( render_tree( jsonml.shift() ) ); - horizRule: function horizRule( block, next ) { - // this needs to find any hr in the block to handle abutting blocks - var m = block.match( /^(?:([\s\S]*?)\n)?[ \t]*([-_*])(?:[ \t]*\2){2,}[ \t]*(?:\n([\s\S]*))?$/ ); + var tag_attrs = ""; + for ( var a in attributes ) + tag_attrs += " " + a + '="' + escapeHTML( attributes[ a ] ) + '"'; - if ( !m ) { - return undefined; - } + // be careful about adding whitespace here for inline elements + if ( tag === "img" || tag === "br" || tag === "hr" ) + return "<"+ tag + tag_attrs + "/>"; + else + return "<"+ tag + tag_attrs + ">" + content.join( "" ) + "</" + tag + ">"; + } - var jsonml = [ [ "hr" ] ]; + function convert_tree_to_html( tree, references, options ) { + var i; + options = options || {}; - // if there's a leading abutting block, process it - if ( m[ 1 ] ) { - jsonml.unshift.apply( jsonml, this.processBlock( m[ 1 ], [] ) ); - } + // shallow clone + var jsonml = tree.slice( 0 ); - // if there's a trailing abutting block, stick it into next - if ( m[ 3 ] ) { - next.unshift( mk_block( m[ 3 ] ) ); + if ( typeof options.preprocessTreeNode === "function" ) + jsonml = options.preprocessTreeNode(jsonml, references); + + // Clone attributes if they exist + var attrs = extract_attr( jsonml ); + if ( attrs ) { + jsonml[ 1 ] = {}; + for ( i in attrs ) { + jsonml[ 1 ][ i ] = attrs[ i ]; } + attrs = jsonml[ 1 ]; + } + // basic case + if ( typeof jsonml === "string" ) return jsonml; - }, - // There are two types of lists. Tight and loose. Tight lists have no whitespace - // between the items (and result in text just in the <li>) and loose lists, - // which have an empty line between list items, resulting in (one or more) - // paragraphs inside the <li>. - // - // There are all sorts weird edge cases about the original markdown.pl's - // handling of lists: - // - // * Nested lists are supposed to be indented by four chars per level. But - // if they aren't, you can get a nested list by indenting by less than - // four so long as the indent doesn't match an indent of an existing list - // item in the 'nest stack'. - // - // * The type of the list (bullet or number) is controlled just by the - // first item at the indent. Subsequent changes are ignored unless they - // are for nested lists - // - lists: (function( ) { - // Use a closure to hide a few variables. - var any_list = "[*+-]|\\d+\\.", - bullet_list = /[*+-]/, - number_list = /\d+\./, - // Capture leading indent as it matters for determining nested lists. - is_list_re = new RegExp( "^( {0,3})(" + any_list + ")[ \t]+" ), - indent_re = "(?: {0,3}\\t| {4})"; + // convert this node + switch ( jsonml[ 0 ] ) { + case "header": + jsonml[ 0 ] = "h" + jsonml[ 1 ].level; + delete jsonml[ 1 ].level; + break; + case "bulletlist": + jsonml[ 0 ] = "ul"; + break; + case "numberlist": + jsonml[ 0 ] = "ol"; + break; + case "listitem": + jsonml[ 0 ] = "li"; + break; + case "para": + jsonml[ 0 ] = "p"; + break; + case "markdown": + jsonml[ 0 ] = "html"; + if ( attrs ) + delete attrs.references; + break; + case "code_block": + jsonml[ 0 ] = "pre"; + i = attrs ? 2 : 1; + var code = [ "code" ]; + code.push.apply( code, jsonml.splice( i, jsonml.length - i ) ); + jsonml[ i ] = code; + break; + case "inlinecode": + jsonml[ 0 ] = "code"; + break; + case "img": + jsonml[ 1 ].src = jsonml[ 1 ].href; + delete jsonml[ 1 ].href; + break; + case "linebreak": + jsonml[ 0 ] = "br"; + break; + case "link": + jsonml[ 0 ] = "a"; + break; + case "link_ref": + jsonml[ 0 ] = "a"; - // TODO: Cache this regexp for certain depths. - // Create a regexp suitable for matching an li for a given stack depth - function regex_for_depth( depth ) { + // grab this ref and clean up the attribute node + var ref = references[ attrs.ref ]; - return new RegExp( - // m[1] = indent, m[2] = list_type - "(?:^(" + indent_re + "{0," + depth + "} {0,3})(" + any_list + ")\\s+)|" + - // m[3] = cont - "(^" + indent_re + "{0," + (depth-1) + "}[ ]{0,4})" - ); + // if the reference exists, make the link + if ( ref ) { + delete attrs.ref; + + // add in the href and title, if present + attrs.href = ref.href; + if ( ref.title ) + attrs.title = ref.title; + + // get rid of the unneeded original text + delete attrs.original; } - function expand_tab( input ) { - return input.replace( / {0,3}\t/g, " " ); + // the reference doesn't exist, so revert to plain text + else { + return attrs.original; } + break; + case "img_ref": + jsonml[ 0 ] = "img"; - // Add inline content `inline` to `li`. inline comes from processInline - // so is an array of content - function add(li, loose, inline, nl) { - if ( loose ) { - li.push( [ "para" ].concat(inline) ); - return; - } - // Hmmm, should this be any block level element or just paras? - var add_to = li[li.length -1] instanceof Array && li[li.length - 1][0] == "para" - ? li[li.length -1] - : li; + // grab this ref and clean up the attribute node + var ref = references[ attrs.ref ]; - // If there is already some content in this list, add the new line in - if ( nl && li.length > 1 ) inline.unshift(nl); + // if the reference exists, make the link + if ( ref ) { + delete attrs.ref; - for ( var i = 0; i < inline.length; i++ ) { - var what = inline[i], - is_str = typeof what == "string"; - if ( is_str && add_to.length > 1 && typeof add_to[add_to.length-1] == "string" ) { - add_to[ add_to.length-1 ] += what; - } - else { - add_to.push( what ); - } - } + // add in the href and title, if present + attrs.src = ref.href; + if ( ref.title ) + attrs.title = ref.title; + + // get rid of the unneeded original text + delete attrs.original; } + // the reference doesn't exist, so revert to plain text + else { + return attrs.original; + } + break; + } - // contained means have an indent greater than the current one. On - // *every* line in the block - function get_contained_blocks( depth, blocks ) { + // convert all the children + i = 1; - var re = new RegExp( "^(" + indent_re + "{" + depth + "}.*?\\n?)*$" ), - replace = new RegExp("^" + indent_re + "{" + depth + "}", "gm"), - ret = []; + // deal with the attribute node, if it exists + if ( attrs ) { + // if there are keys, skip over it + for ( var key in jsonml[ 1 ] ) { + i = 2; + break; + } + // if there aren't, remove it + if ( i === 1 ) + jsonml.splice( i, 1 ); + } - while ( blocks.length > 0 ) { - if ( re.exec( blocks[0] ) ) { - var b = blocks.shift(), - // Now remove that indent - x = b.replace( replace, ""); + for ( ; i < jsonml.length; ++i ) { + jsonml[ i ] = convert_tree_to_html( jsonml[ i ], references, options ); + } - ret.push( mk_block( x, b.trailing, b.lineNumber ) ); - } - else { - break; - } - } - return ret; - } + return jsonml; + } - // passed to stack.forEach to turn list items up the stack into paras - function paragraphify(s, i, stack) { - var list = s.list; - var last_li = list[list.length-1]; - if ( last_li[1] instanceof Array && last_li[1][0] == "para" ) { - return; + // merges adjacent text nodes into a single node + function merge_text_nodes( jsonml ) { + // skip the tag name and attribute hash + var i = extract_attr( jsonml ) ? 2 : 1; + + while ( i < jsonml.length ) { + // if it's a string check the next item too + if ( typeof jsonml[ i ] === "string" ) { + if ( i + 1 < jsonml.length && typeof jsonml[ i + 1 ] === "string" ) { + // merge the second string into the first and remove it + jsonml[ i ] += jsonml.splice( i + 1, 1 )[ 0 ]; } - if ( i + 1 == stack.length ) { - // Last stack frame - // Keep the same array, but replace the contents - last_li.push( ["para"].concat( last_li.splice(1, last_li.length - 1) ) ); - } else { - var sublist = last_li.pop(); - last_li.push( ["para"].concat( last_li.splice(1, last_li.length - 1) ), sublist ); + ++i; } } + // if it's not a string recurse + else { + merge_text_nodes( jsonml[ i ] ); + ++i; + } + } + }; - // The matcher function - return function( block, next ) { - var m = block.match( is_list_re ); - if ( !m ) return undefined; - function make_list( m ) { - var list = bullet_list.exec( m[2] ) - ? ["bulletlist"] - : ["numberlist"]; - stack.push( { list: list, indent: m[1] } ); - return list; - } + var DialectHelpers = {}; + DialectHelpers.inline_until_char = function( text, want ) { + var consumed = 0, + nodes = []; + while ( true ) { + if ( text.charAt( consumed ) === want ) { + // Found the character we were looking for + consumed++; + return [ consumed, nodes ]; + } - var stack = [], // Stack of lists for nesting. - list = make_list( m ), - last_li, - loose = false, - ret = [ stack[0].list ], - i; + if ( consumed >= text.length ) { + // No closing char found. Abort. + return null; + } - // Loop to search over block looking for inner block elements and loose lists - loose_search: - while ( true ) { - // Split into lines preserving new lines at end of line - var lines = block.split( /(?=\n)/ ); + var res = this.dialect.inline.__oneElement__.call(this, text.substr( consumed ) ); + consumed += res[ 0 ]; + // Add any returned nodes. + nodes.push.apply( nodes, res.slice( 1 ) ); + } + }; - // We have to grab all lines for a li and call processInline on them - // once as there are some inline things that can span lines. - var li_accumulate = ""; + // Helper function to make sub-classing a dialect easier + DialectHelpers.subclassDialect = function( d ) { + function Block() {} + Block.prototype = d.block; + function Inline() {} + Inline.prototype = d.inline; - // Loop over the lines in this block looking for tight lists. - tight_search: - for ( var line_no = 0; line_no < lines.length; line_no++ ) { - var nl = "", - l = lines[line_no].replace(/^\n/, function(n) { nl = n; return ""; }); + return { block: new Block(), inline: new Inline() }; + }; - // TODO: really should cache this - var line_re = regex_for_depth( stack.length ); - m = l.match( line_re ); - //print( "line:", uneval(l), "\nline match:", uneval(m) ); - // We have a list item - if ( m[1] !== undefined ) { - // Process the previous list item, if any - if ( li_accumulate.length ) { - add( last_li, loose, this.processInline( li_accumulate ), nl ); - // Loose mode will have been dealt with. Reset it - loose = false; - li_accumulate = ""; - } - m[1] = expand_tab( m[1] ); - var wanted_depth = Math.floor(m[1].length/4)+1; - //print( "want:", wanted_depth, "stack:", stack.length); - if ( wanted_depth > stack.length ) { - // Deep enough for a nested list outright - //print ( "new nested list" ); - list = make_list( m ); - last_li.push( list ); - last_li = list[1] = [ "listitem" ]; - } - else { - // We aren't deep enough to be strictly a new level. This is - // where Md.pl goes nuts. If the indent matches a level in the - // stack, put it there, else put it one deeper then the - // wanted_depth deserves. - var found = false; - for ( i = 0; i < stack.length; i++ ) { - if ( stack[ i ].indent != m[1] ) continue; - list = stack[ i ].list; - stack.splice( i+1, stack.length - (i+1) ); - found = true; - break; - } + var forEach = MarkdownHelpers.forEach, + extract_attr = MarkdownHelpers.extract_attr, + mk_block = MarkdownHelpers.mk_block, + isEmpty = MarkdownHelpers.isEmpty, + inline_until_char = DialectHelpers.inline_until_char; - if (!found) { - //print("not found. l:", uneval(l)); - wanted_depth++; - if ( wanted_depth <= stack.length ) { - stack.splice(wanted_depth, stack.length - wanted_depth); - //print("Desired depth now", wanted_depth, "stack:", stack.length); - list = stack[wanted_depth-1].list; - //print("list:", uneval(list) ); - } - else { - //print ("made new stack for messy indent"); - list = make_list(m); - last_li.push(list); - } - } + /** + * Gruber dialect + * + * The default dialect that follows the rules set out by John Gruber's + * markdown.pl as closely as possible. Well actually we follow the behaviour of + * that script which in some places is not exactly what the syntax web page + * says. + **/ + var Gruber = { + block: { + atxHeader: function atxHeader( block, next ) { + var m = block.match( /^(#{1,6})\s*(.*?)\s*#*\s*(?:\n|$)/ ); - //print( uneval(list), "last", list === stack[stack.length-1].list ); - last_li = [ "listitem" ]; - list.push(last_li); - } // end depth of shenegains - nl = ""; - } + if ( !m ) + return undefined; - // Add content - if ( l.length > m[0].length ) { - li_accumulate += nl + l.substr( m[0].length ); - } - } // tight_search + var header = [ "header", { level: m[ 1 ].length } ]; + Array.prototype.push.apply(header, this.processInline(m[ 2 ])); - if ( li_accumulate.length ) { - add( last_li, loose, this.processInline( li_accumulate ), nl ); - // Loose mode will have been dealt with. Reset it - loose = false; - li_accumulate = ""; - } + if ( m[0].length < block.length ) + next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); - // Look at the next block - we might have a loose list. Or an extra - // paragraph for the current li - var contained = get_contained_blocks( stack.length, next ); + return [ header ]; + }, - // Deal with code blocks or properly nested lists - if ( contained.length > 0 ) { - // Make sure all listitems up the stack are paragraphs - forEach( stack, paragraphify, this); + setextHeader: function setextHeader( block, next ) { + var m = block.match( /^(.*)\n([-=])\2\2+(?:\n|$)/ ); - last_li.push.apply( last_li, this.toTree( contained, [] ) ); - } + if ( !m ) + return undefined; - var next_block = next[0] && next[0].valueOf() || ""; + var level = ( m[ 2 ] === "=" ) ? 1 : 2, + header = [ "header", { level : level }, m[ 1 ] ]; - if ( next_block.match(is_list_re) || next_block.match( /^ / ) ) { - block = next.shift(); + if ( m[0].length < block.length ) + next.unshift( mk_block( block.substr( m[0].length ), block.trailing, block.lineNumber + 2 ) ); - // Check for an HR following a list: features/lists/hr_abutting - var hr = this.dialect.block.horizRule( block, next ); + return [ header ]; + }, - if ( hr ) { - ret.push.apply(ret, hr); - break; - } + code: function code( block, next ) { + // | Foo + // |bar + // should be a code block followed by a paragraph. Fun + // + // There might also be adjacent code block to merge. - // Make sure all listitems up the stack are paragraphs - forEach( stack, paragraphify, this); + var ret = [], + re = /^(?: {0,3}\t| {4})(.*)\n?/; - loose = true; - continue loose_search; - } - break; - } // loose_search + // 4 spaces + content + if ( !block.match( re ) ) + return undefined; - return ret; - }; - })(), + block_search: + do { + // Now pull out the rest of the lines + var b = this.loop_re_over_block( + re, block.valueOf(), function( m ) { ret.push( m[1] ); } ); - blockquote: function blockquote( block, next ) { - if ( !block.match( /^>/m ) ) - return undefined; + if ( b.length ) { + // Case alluded to in first comment. push it back on as a new block + next.unshift( mk_block(b, block.trailing) ); + break block_search; + } + else if ( next.length ) { + // Check the next block - it might be code too + if ( !next[0].match( re ) ) + break block_search; - var jsonml = []; + // Pull how how many blanks lines follow - minus two to account for .join + ret.push ( block.trailing.replace(/[^\n]/g, "").substring(2) ); - // separate out the leading abutting block, if any - if ( block[ 0 ] != ">" ) { - var lines = block.split( /\n/ ), - prev = []; + block = next.shift(); + } + else { + break block_search; + } + } while ( true ); - // keep shifting lines until you find a crotchet - while ( lines.length && lines[ 0 ][ 0 ] != ">" ) { - prev.push( lines.shift() ); - } + return [ [ "code_block", ret.join("\n") ] ]; + }, - // reassemble! - block = lines.join( "\n" ); - jsonml.push.apply( jsonml, this.processBlock( prev.join( "\n" ), [] ) ); - } + horizRule: function horizRule( block, next ) { + // this needs to find any hr in the block to handle abutting blocks + var m = block.match( /^(?:([\s\S]*?)\n)?[ \t]*([-_*])(?:[ \t]*\2){2,}[ \t]*(?:\n([\s\S]*))?$/ ); - // if the next block is also a blockquote merge it in - while ( next.length && next[ 0 ][ 0 ] == ">" ) { - var b = next.shift(); - block = new String(block + block.trailing + b); - block.trailing = b.trailing; - } + if ( !m ) + return undefined; - // Strip off the leading "> " and re-process as a block. - var input = block.replace( /^> ?/gm, "" ), - old_tree = this.tree, - processedBlock = this.toTree( input, [ "blockquote" ] ), - attr = extract_attr( processedBlock ); + var jsonml = [ [ "hr" ] ]; - // If any link references were found get rid of them - if ( attr && attr.references ) { - delete attr.references; - // And then remove the attribute object if it's empty - if ( isEmpty( attr ) ) { - processedBlock.splice( 1, 1 ); + // if there's a leading abutting block, process it + if ( m[ 1 ] ) { + var contained = mk_block( m[ 1 ], "", block.lineNumber ); + jsonml.unshift.apply( jsonml, this.toTree( contained, [] ) ); } - } - jsonml.push( processedBlock ); - return jsonml; - }, + // if there's a trailing abutting block, stick it into next + if ( m[ 3 ] ) + next.unshift( mk_block( m[ 3 ], block.trailing, block.lineNumber + 1 ) ); - referenceDefn: function referenceDefn( block, next) { - var re = /^\s*\[(.*?)\]:\s*(\S+)(?:\s+(?:(['"])(.*?)\3|\((.*?)\)))?\n?/; - // interesting matches are [ , ref_id, url, , title, title ] + return jsonml; + }, - if ( !block.match(re) ) - return undefined; + // There are two types of lists. Tight and loose. Tight lists have no whitespace + // between the items (and result in text just in the <li>) and loose lists, + // which have an empty line between list items, resulting in (one or more) + // paragraphs inside the <li>. + // + // There are all sorts weird edge cases about the original markdown.pl's + // handling of lists: + // + // * Nested lists are supposed to be indented by four chars per level. But + // if they aren't, you can get a nested list by indenting by less than + // four so long as the indent doesn't match an indent of an existing list + // item in the 'nest stack'. + // + // * The type of the list (bullet or number) is controlled just by the + // first item at the indent. Subsequent changes are ignored unless they + // are for nested lists + // + lists: (function( ) { + // Use a closure to hide a few variables. + var any_list = "[*+-]|\\d+\\.", + bullet_list = /[*+-]/, + // Capture leading indent as it matters for determining nested lists. + is_list_re = new RegExp( "^( {0,3})(" + any_list + ")[ \t]+" ), + indent_re = "(?: {0,3}\\t| {4})"; - // make an attribute node if it doesn't exist - if ( !extract_attr( this.tree ) ) { - this.tree.splice( 1, 0, {} ); - } + // TODO: Cache this regexp for certain depths. + // Create a regexp suitable for matching an li for a given stack depth + function regex_for_depth( depth ) { - var attrs = extract_attr( this.tree ); + return new RegExp( + // m[1] = indent, m[2] = list_type + "(?:^(" + indent_re + "{0," + depth + "} {0,3})(" + any_list + ")\\s+)|" + + // m[3] = cont + "(^" + indent_re + "{0," + (depth-1) + "}[ ]{0,4})" + ); + } + function expand_tab( input ) { + return input.replace( / {0,3}\t/g, " " ); + } - // make a references hash if it doesn't exist - if ( attrs.references === undefined ) { - attrs.references = {}; - } + // Add inline content `inline` to `li`. inline comes from processInline + // so is an array of content + function add(li, loose, inline, nl) { + if ( loose ) { + li.push( [ "para" ].concat(inline) ); + return; + } + // Hmmm, should this be any block level element or just paras? + var add_to = li[li.length -1] instanceof Array && li[li.length - 1][0] === "para" + ? li[li.length -1] + : li; - var b = this.loop_re_over_block(re, block, function( m ) { + // If there is already some content in this list, add the new line in + if ( nl && li.length > 1 ) + inline.unshift(nl); - if ( m[2] && m[2][0] == "<" && m[2][m[2].length-1] == ">" ) - m[2] = m[2].substring( 1, m[2].length - 1 ); + for ( var i = 0; i < inline.length; i++ ) { + var what = inline[i], + is_str = typeof what === "string"; + if ( is_str && add_to.length > 1 && typeof add_to[add_to.length-1] === "string" ) + add_to[ add_to.length-1 ] += what; + else + add_to.push( what ); + } + } - var ref = attrs.references[ m[1].toLowerCase() ] = { - href: m[2] - }; + // contained means have an indent greater than the current one. On + // *every* line in the block + function get_contained_blocks( depth, blocks ) { - if ( m[4] !== undefined ) - ref.title = m[4]; - else if ( m[5] !== undefined ) - ref.title = m[5]; + var re = new RegExp( "^(" + indent_re + "{" + depth + "}.*?\\n?)*$" ), + replace = new RegExp("^" + indent_re + "{" + depth + "}", "gm"), + ret = []; - } ); + while ( blocks.length > 0 ) { + if ( re.exec( blocks[0] ) ) { + var b = blocks.shift(), + // Now remove that indent + x = b.replace( replace, ""); - if ( b.length ) - next.unshift( mk_block( b, block.trailing ) ); + ret.push( mk_block( x, b.trailing, b.lineNumber ) ); + } + else + break; + } + return ret; + } - return []; - }, + // passed to stack.forEach to turn list items up the stack into paras + function paragraphify(s, i, stack) { + var list = s.list; + var last_li = list[list.length-1]; - para: function para( block, next ) { - // everything's a para! - return [ ["para"].concat( this.processInline( block ) ) ]; - } - } -}; + if ( last_li[1] instanceof Array && last_li[1][0] === "para" ) + return; + if ( i + 1 === stack.length ) { + // Last stack frame + // Keep the same array, but replace the contents + last_li.push( ["para"].concat( last_li.splice(1, last_li.length - 1) ) ); + } + else { + var sublist = last_li.pop(); + last_li.push( ["para"].concat( last_li.splice(1, last_li.length - 1) ), sublist ); + } + } -Markdown.dialects.Gruber.inline = { + // The matcher function + return function( block, next ) { + var m = block.match( is_list_re ); + if ( !m ) + return undefined; - __oneElement__: function oneElement( text, patterns_or_re, previous_nodes ) { - var m, - res, - lastIndex = 0; + function make_list( m ) { + var list = bullet_list.exec( m[2] ) + ? ["bulletlist"] + : ["numberlist"]; - patterns_or_re = patterns_or_re || this.dialect.inline.__patterns__; - var re = new RegExp( "([\\s\\S]*?)(" + (patterns_or_re.source || patterns_or_re) + ")" ); + stack.push( { list: list, indent: m[1] } ); + return list; + } - m = re.exec( text ); - if (!m) { - // Just boring text - return [ text.length, text ]; - } - else if ( m[1] ) { - // Some un-interesting text matched. Return that first - return [ m[1].length, m[1] ]; - } - var res; - if ( m[2] in this.dialect.inline ) { - res = this.dialect.inline[ m[2] ].call( - this, - text.substr( m.index ), m, previous_nodes || [] ); - } - // Default for now to make dev easier. just slurp special and output it. - res = res || [ m[2].length, m[2] ]; - return res; - }, + var stack = [], // Stack of lists for nesting. + list = make_list( m ), + last_li, + loose = false, + ret = [ stack[0].list ], + i; - __call__: function inline( text, patterns ) { + // Loop to search over block looking for inner block elements and loose lists + loose_search: + while ( true ) { + // Split into lines preserving new lines at end of line + var lines = block.split( /(?=\n)/ ); - var out = [], - res; + // We have to grab all lines for a li and call processInline on them + // once as there are some inline things that can span lines. + var li_accumulate = "", nl = ""; - function add(x) { - //D:self.debug(" adding output", uneval(x)); - if ( typeof x == "string" && typeof out[out.length-1] == "string" ) - out[ out.length-1 ] += x; - else - out.push(x); - } + // Loop over the lines in this block looking for tight lists. + tight_search: + for ( var line_no = 0; line_no < lines.length; line_no++ ) { + nl = ""; + var l = lines[line_no].replace(/^\n/, function(n) { nl = n; return ""; }); - while ( text.length > 0 ) { - res = this.dialect.inline.__oneElement__.call(this, text, patterns, out ); - text = text.substr( res.shift() ); - forEach(res, add ) - } - return out; - }, + // TODO: really should cache this + var line_re = regex_for_depth( stack.length ); - // These characters are intersting elsewhere, so have rules for them so that - // chunks of plain text blocks don't include them - "]": function () {}, - "}": function () {}, + m = l.match( line_re ); + //print( "line:", uneval(l), "\nline match:", uneval(m) ); - "\\": function escaped( text ) { - // [ length of input processed, node/children to add... ] - // Only esacape: \ ` * _ { } [ ] ( ) # * + - . ! - if ( text.match( /^\\[\\`\*_{}\[\]()#\+.!\-]/ ) ) - return [ 2, text.charAt( 1 ) ]; - else - // Not an esacpe - return [ 1, "\\" ]; - }, + // We have a list item + if ( m[1] !== undefined ) { + // Process the previous list item, if any + if ( li_accumulate.length ) { + add( last_li, loose, this.processInline( li_accumulate ), nl ); + // Loose mode will have been dealt with. Reset it + loose = false; + li_accumulate = ""; + } - "![": function image( text ) { + m[1] = expand_tab( m[1] ); + var wanted_depth = Math.floor(m[1].length/4)+1; + //print( "want:", wanted_depth, "stack:", stack.length); + if ( wanted_depth > stack.length ) { + // Deep enough for a nested list outright + //print ( "new nested list" ); + list = make_list( m ); + last_li.push( list ); + last_li = list[1] = [ "listitem" ]; + } + else { + // We aren't deep enough to be strictly a new level. This is + // where Md.pl goes nuts. If the indent matches a level in the + // stack, put it there, else put it one deeper then the + // wanted_depth deserves. + var found = false; + for ( i = 0; i < stack.length; i++ ) { + if ( stack[ i ].indent !== m[1] ) + continue; - // Unlike images, alt text is plain text only. no other elements are - // allowed in there + list = stack[ i ].list; + stack.splice( i+1, stack.length - (i+1) ); + found = true; + break; + } - // ![Alt text](/path/to/img.jpg "Optional title") - // 1 2 3 4 <--- captures - var m = text.match( /^!\[(.*?)\][ \t]*\([ \t]*([^")]*?)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ ); + if (!found) { + //print("not found. l:", uneval(l)); + wanted_depth++; + if ( wanted_depth <= stack.length ) { + stack.splice(wanted_depth, stack.length - wanted_depth); + //print("Desired depth now", wanted_depth, "stack:", stack.length); + list = stack[wanted_depth-1].list; + //print("list:", uneval(list) ); + } + else { + //print ("made new stack for messy indent"); + list = make_list(m); + last_li.push(list); + } + } - if ( m ) { - if ( m[2] && m[2][0] == "<" && m[2][m[2].length-1] == ">" ) - m[2] = m[2].substring( 1, m[2].length - 1 ); + //print( uneval(list), "last", list === stack[stack.length-1].list ); + last_li = [ "listitem" ]; + list.push(last_li); + } // end depth of shenegains + nl = ""; + } - m[2] = this.dialect.inline.__call__.call( this, m[2], /\\/ )[0]; + // Add content + if ( l.length > m[0].length ) + li_accumulate += nl + l.substr( m[0].length ); + } // tight_search - var attrs = { alt: m[1], href: m[2] || "" }; - if ( m[4] !== undefined) - attrs.title = m[4]; + if ( li_accumulate.length ) { + add( last_li, loose, this.processInline( li_accumulate ), nl ); + // Loose mode will have been dealt with. Reset it + loose = false; + li_accumulate = ""; + } - return [ m[0].length, [ "img", attrs ] ]; - } + // Look at the next block - we might have a loose list. Or an extra + // paragraph for the current li + var contained = get_contained_blocks( stack.length, next ); - // ![Alt text][id] - m = text.match( /^!\[(.*?)\][ \t]*\[(.*?)\]/ ); + // Deal with code blocks or properly nested lists + if ( contained.length > 0 ) { + // Make sure all listitems up the stack are paragraphs + forEach( stack, paragraphify, this); - if ( m ) { - // We can't check if the reference is known here as it likely wont be - // found till after. Check it in md tree->hmtl tree conversion - return [ m[0].length, [ "img_ref", { alt: m[1], ref: m[2].toLowerCase(), original: m[0] } ] ]; - } + last_li.push.apply( last_li, this.toTree( contained, [] ) ); + } - // Just consume the '![' - return [ 2, "![" ]; - }, + var next_block = next[0] && next[0].valueOf() || ""; - "[": function link( text ) { + if ( next_block.match(is_list_re) || next_block.match( /^ / ) ) { + block = next.shift(); - var orig = String(text); - // Inline content is possible inside `link text` - var res = Markdown.DialectHelpers.inline_until_char.call( this, text.substr(1), "]" ); + // Check for an HR following a list: features/lists/hr_abutting + var hr = this.dialect.block.horizRule( block, next ); - // No closing ']' found. Just consume the [ - if ( !res ) return [ 1, "[" ]; + if ( hr ) { + ret.push.apply(ret, hr); + break; + } - var consumed = 1 + res[ 0 ], - children = res[ 1 ], - link, - attrs; + // Make sure all listitems up the stack are paragraphs + forEach( stack, paragraphify, this); - // At this point the first [...] has been parsed. See what follows to find - // out which kind of link we are (reference or direct url) - text = text.substr( consumed ); + loose = true; + continue loose_search; + } + break; + } // loose_search - // [link text](/path/to/img.jpg "Optional title") - // 1 2 3 <--- captures - // This will capture up to the last paren in the block. We then pull - // back based on if there a matching ones in the url - // ([here](/url/(test)) - // The parens have to be balanced - var m = text.match( /^\s*\([ \t]*([^"']*)(?:[ \t]+(["'])(.*?)\2)?[ \t]*\)/ ); - if ( m ) { - var url = m[1]; - consumed += m[0].length; + return ret; + }; + })(), - if ( url && url[0] == "<" && url[url.length-1] == ">" ) - url = url.substring( 1, url.length - 1 ); + blockquote: function blockquote( block, next ) { + if ( !block.match( /^>/m ) ) + return undefined; - // If there is a title we don't have to worry about parens in the url - if ( !m[3] ) { - var open_parens = 1; // One open that isn't in the capture - for ( var len = 0; len < url.length; len++ ) { - switch ( url[len] ) { - case "(": - open_parens++; - break; - case ")": - if ( --open_parens == 0) { - consumed -= url.length - len; - url = url.substring(0, len); - } - break; - } + var jsonml = []; + + // separate out the leading abutting block, if any. I.e. in this case: + // + // a + // > b + // + if ( block[ 0 ] !== ">" ) { + var lines = block.split( /\n/ ), + prev = [], + line_no = block.lineNumber; + + // keep shifting lines until you find a crotchet + while ( lines.length && lines[ 0 ][ 0 ] !== ">" ) { + prev.push( lines.shift() ); + line_no++; } + + var abutting = mk_block( prev.join( "\n" ), "\n", block.lineNumber ); + jsonml.push.apply( jsonml, this.processBlock( abutting, [] ) ); + // reassemble new block of just block quotes! + block = mk_block( lines.join( "\n" ), block.trailing, line_no ); } - // Process escapes only - url = this.dialect.inline.__call__.call( this, url, /\\/ )[0]; - attrs = { href: url || "" }; - if ( m[3] !== undefined) - attrs.title = m[3]; + // if the next block is also a blockquote merge it in + while ( next.length && next[ 0 ][ 0 ] === ">" ) { + var b = next.shift(); + block = mk_block( block + block.trailing + b, b.trailing, block.lineNumber ); + } - link = [ "link", attrs ].concat( children ); - return [ consumed, link ]; - } + // Strip off the leading "> " and re-process as a block. + var input = block.replace( /^> ?/gm, "" ), + old_tree = this.tree, + processedBlock = this.toTree( input, [ "blockquote" ] ), + attr = extract_attr( processedBlock ); - // [Alt text][id] - // [Alt text] [id] - m = text.match( /^\s*\[(.*?)\]/ ); + // If any link references were found get rid of them + if ( attr && attr.references ) { + delete attr.references; + // And then remove the attribute object if it's empty + if ( isEmpty( attr ) ) + processedBlock.splice( 1, 1 ); + } - if ( m ) { + jsonml.push( processedBlock ); + return jsonml; + }, - consumed += m[ 0 ].length; + referenceDefn: function referenceDefn( block, next) { + var re = /^\s*\[(.*?)\]:\s*(\S+)(?:\s+(?:(['"])(.*?)\3|\((.*?)\)))?\n?/; + // interesting matches are [ , ref_id, url, , title, title ] - // [links][] uses links as its reference - attrs = { ref: ( m[ 1 ] || String(children) ).toLowerCase(), original: orig.substr( 0, consumed ) }; + if ( !block.match(re) ) + return undefined; - link = [ "link_ref", attrs ].concat( children ); + // make an attribute node if it doesn't exist + if ( !extract_attr( this.tree ) ) + this.tree.splice( 1, 0, {} ); - // We can't check if the reference is known here as it likely wont be - // found till after. Check it in md tree->hmtl tree conversion. - // Store the original so that conversion can revert if the ref isn't found. - return [ consumed, link ]; - } + var attrs = extract_attr( this.tree ); - // [id] - // Only if id is plain (no formatting.) - if ( children.length == 1 && typeof children[0] == "string" ) { + // make a references hash if it doesn't exist + if ( attrs.references === undefined ) + attrs.references = {}; - attrs = { ref: children[0].toLowerCase(), original: orig.substr( 0, consumed ) }; - link = [ "link_ref", attrs, children[0] ]; - return [ consumed, link ]; - } + var b = this.loop_re_over_block(re, block, function( m ) { - // Just consume the "[" - return [ 1, "[" ]; - }, + if ( m[2] && m[2][0] === "<" && m[2][m[2].length-1] === ">" ) + m[2] = m[2].substring( 1, m[2].length - 1 ); + var ref = attrs.references[ m[1].toLowerCase() ] = { + href: m[2] + }; - "<": function autoLink( text ) { - var m; + if ( m[4] !== undefined ) + ref.title = m[4]; + else if ( m[5] !== undefined ) + ref.title = m[5]; - if ( ( m = text.match( /^<(?:((https?|ftp|mailto):[^>]+)|(.*?@.*?\.[a-zA-Z]+))>/ ) ) != null ) { - if ( m[3] ) { - return [ m[0].length, [ "link", { href: "mailto:" + m[3] }, m[3] ] ]; + } ); - } - else if ( m[2] == "mailto" ) { - return [ m[0].length, [ "link", { href: m[1] }, m[1].substr("mailto:".length ) ] ]; - } - else - return [ m[0].length, [ "link", { href: m[1] }, m[1] ] ]; - } + if ( b.length ) + next.unshift( mk_block( b, block.trailing ) ); - return [ 1, "<" ]; - }, + return []; + }, - "`": function inlineCode( text ) { - // Inline code block. as many backticks as you like to start it - // Always skip over the opening ticks. - var m = text.match( /(`+)(([\s\S]*?)\1)/ ); - - if ( m && m[2] ) - return [ m[1].length + m[2].length, [ "inlinecode", m[3] ] ]; - else { - // TODO: No matching end code found - warn! - return [ 1, "`" ]; + para: function para( block ) { + // everything's a para! + return [ ["para"].concat( this.processInline( block ) ) ]; } }, - " \n": function lineBreak( text ) { - return [ 3, [ "linebreak" ] ]; - } + inline: { -}; + __oneElement__: function oneElement( text, patterns_or_re, previous_nodes ) { + var m, + res; -// Meta Helper/generator method for em and strong handling -function strong_em( tag, md ) { + patterns_or_re = patterns_or_re || this.dialect.inline.__patterns__; + var re = new RegExp( "([\\s\\S]*?)(" + (patterns_or_re.source || patterns_or_re) + ")" ); - var state_slot = tag + "_state", - other_slot = tag == "strong" ? "em_state" : "strong_state"; + m = re.exec( text ); + if (!m) { + // Just boring text + return [ text.length, text ]; + } + else if ( m[1] ) { + // Some un-interesting text matched. Return that first + return [ m[1].length, m[1] ]; + } - function CloseTag(len) { - this.len_after = len; - this.name = "close_" + md; - } + var res; + if ( m[2] in this.dialect.inline ) { + res = this.dialect.inline[ m[2] ].call( + this, + text.substr( m.index ), m, previous_nodes || [] ); + } + // Default for now to make dev easier. just slurp special and output it. + res = res || [ m[2].length, m[2] ]; + return res; + }, - return function ( text, orig_match ) { + __call__: function inline( text, patterns ) { - if ( this[state_slot][0] == md ) { - // Most recent em is of this type - //D:this.debug("closing", md); - this[state_slot].shift(); + var out = [], + res; - // "Consume" everything to go back to the recrusion in the else-block below - return[ text.length, new CloseTag(text.length-md.length) ]; - } - else { - // Store a clone of the em/strong states - var other = this[other_slot].slice(), - state = this[state_slot].slice(); + function add(x) { + //D:self.debug(" adding output", uneval(x)); + if ( typeof x === "string" && typeof out[out.length-1] === "string" ) + out[ out.length-1 ] += x; + else + out.push(x); + } - this[state_slot].unshift(md); + while ( text.length > 0 ) { + res = this.dialect.inline.__oneElement__.call(this, text, patterns, out ); + text = text.substr( res.shift() ); + forEach(res, add ); + } - //D:this.debug_indent += " "; + return out; + }, - // Recurse - var res = this.processInline( text.substr( md.length ) ); - //D:this.debug_indent = this.debug_indent.substr(2); + // These characters are intersting elsewhere, so have rules for them so that + // chunks of plain text blocks don't include them + "]": function () {}, + "}": function () {}, - var last = res[res.length - 1]; + __escape__ : /^\\[\\`\*_{}\[\]()#\+.!\-]/, - //D:this.debug("processInline from", tag + ": ", uneval( res ) ); + "\\": function escaped( text ) { + // [ length of input processed, node/children to add... ] + // Only esacape: \ ` * _ { } [ ] ( ) # * + - . ! + if ( this.dialect.inline.__escape__.exec( text ) ) + return [ 2, text.charAt( 1 ) ]; + else + // Not an esacpe + return [ 1, "\\" ]; + }, - var check = this[state_slot].shift(); - if ( last instanceof CloseTag ) { - res.pop(); - // We matched! Huzzah. - var consumed = text.length - last.len_after; - return [ consumed, [ tag ].concat(res) ]; - } - else { - // Restore the state of the other kind. We might have mistakenly closed it. - this[other_slot] = other; - this[state_slot] = state; + "![": function image( text ) { - // We can't reuse the processed result as it could have wrong parsing contexts in it. - return [ md.length, md ]; - } - } - }; // End returned function -} + // Unlike images, alt text is plain text only. no other elements are + // allowed in there -Markdown.dialects.Gruber.inline["**"] = strong_em("strong", "**"); -Markdown.dialects.Gruber.inline["__"] = strong_em("strong", "__"); -Markdown.dialects.Gruber.inline["*"] = strong_em("em", "*"); -Markdown.dialects.Gruber.inline["_"] = strong_em("em", "_"); + // ![Alt text](/path/to/img.jpg "Optional title") + // 1 2 3 4 <--- captures + var m = text.match( /^!\[(.*?)\][ \t]*\([ \t]*([^")]*?)(?:[ \t]+(["'])(.*?)\3)?[ \t]*\)/ ); + if ( m ) { + if ( m[2] && m[2][0] === "<" && m[2][m[2].length-1] === ">" ) + m[2] = m[2].substring( 1, m[2].length - 1 ); -// Build default order from insertion order. -Markdown.buildBlockOrder = function(d) { - var ord = []; - for ( var i in d ) { - if ( i == "__order__" || i == "__call__" ) continue; - ord.push( i ); - } - d.__order__ = ord; -}; + m[2] = this.dialect.inline.__call__.call( this, m[2], /\\/ )[0]; -// Build patterns for inline matcher -Markdown.buildInlinePatterns = function(d) { - var patterns = []; + var attrs = { alt: m[1], href: m[2] || "" }; + if ( m[4] !== undefined) + attrs.title = m[4]; - for ( var i in d ) { - // __foo__ is reserved and not a pattern - if ( i.match( /^__.*__$/) ) continue; - var l = i.replace( /([\\.*+?|()\[\]{}])/g, "\\$1" ) - .replace( /\n/, "\\n" ); - patterns.push( i.length == 1 ? l : "(?:" + l + ")" ); - } + return [ m[0].length, [ "img", attrs ] ]; + } - patterns = patterns.join("|"); - d.__patterns__ = patterns; - //print("patterns:", uneval( patterns ) ); + // ![Alt text][id] + m = text.match( /^!\[(.*?)\][ \t]*\[(.*?)\]/ ); - var fn = d.__call__; - d.__call__ = function(text, pattern) { - if ( pattern != undefined ) { - return fn.call(this, text, pattern); - } - else - { - return fn.call(this, text, patterns); - } - }; -}; + if ( m ) { + // We can't check if the reference is known here as it likely wont be + // found till after. Check it in md tree->hmtl tree conversion + return [ m[0].length, [ "img_ref", { alt: m[1], ref: m[2].toLowerCase(), original: m[0] } ] ]; + } -Markdown.DialectHelpers = {}; -Markdown.DialectHelpers.inline_until_char = function( text, want ) { - var consumed = 0, - nodes = []; + // Just consume the '![' + return [ 2, "![" ]; + }, - while ( true ) { - if ( text.charAt( consumed ) == want ) { - // Found the character we were looking for - consumed++; - return [ consumed, nodes ]; - } + "[": function link( text ) { - if ( consumed >= text.length ) { - // No closing char found. Abort. - return null; - } + var orig = String(text); + // Inline content is possible inside `link text` + var res = inline_until_char.call( this, text.substr(1), "]" ); - var res = this.dialect.inline.__oneElement__.call(this, text.substr( consumed ) ); - consumed += res[ 0 ]; - // Add any returned nodes. - nodes.push.apply( nodes, res.slice( 1 ) ); - } -} + // No closing ']' found. Just consume the [ + if ( !res ) + return [ 1, "[" ]; -// Helper function to make sub-classing a dialect easier -Markdown.subclassDialect = function( d ) { - function Block() {} - Block.prototype = d.block; - function Inline() {} - Inline.prototype = d.inline; + var consumed = 1 + res[ 0 ], + children = res[ 1 ], + link, + attrs; - return { block: new Block(), inline: new Inline() }; -}; + // At this point the first [...] has been parsed. See what follows to find + // out which kind of link we are (reference or direct url) + text = text.substr( consumed ); -Markdown.buildBlockOrder ( Markdown.dialects.Gruber.block ); -Markdown.buildInlinePatterns( Markdown.dialects.Gruber.inline ); + // [link text](/path/to/img.jpg "Optional title") + // 1 2 3 <--- captures + // This will capture up to the last paren in the block. We then pull + // back based on if there a matching ones in the url + // ([here](/url/(test)) + // The parens have to be balanced + var m = text.match( /^\s*\([ \t]*([^"']*)(?:[ \t]+(["'])(.*?)\2)?[ \t]*\)/ ); + if ( m ) { + var url = m[1]; + consumed += m[0].length; -Markdown.dialects.Maruku = Markdown.subclassDialect( Markdown.dialects.Gruber ); + if ( url && url[0] === "<" && url[url.length-1] === ">" ) + url = url.substring( 1, url.length - 1 ); -Markdown.dialects.Maruku.processMetaHash = function processMetaHash( meta_string ) { - var meta = split_meta_hash( meta_string ), - attr = {}; + // If there is a title we don't have to worry about parens in the url + if ( !m[3] ) { + var open_parens = 1; // One open that isn't in the capture + for ( var len = 0; len < url.length; len++ ) { + switch ( url[len] ) { + case "(": + open_parens++; + break; + case ")": + if ( --open_parens === 0) { + consumed -= url.length - len; + url = url.substring(0, len); + } + break; + } + } + } - for ( var i = 0; i < meta.length; ++i ) { - // id: #foo - if ( /^#/.test( meta[ i ] ) ) { - attr.id = meta[ i ].substring( 1 ); - } - // class: .foo - else if ( /^\./.test( meta[ i ] ) ) { - // if class already exists, append the new one - if ( attr["class"] ) { - attr["class"] = attr["class"] + meta[ i ].replace( /./, " " ); - } - else { - attr["class"] = meta[ i ].substring( 1 ); - } - } - // attribute: foo=bar - else if ( /\=/.test( meta[ i ] ) ) { - var s = meta[ i ].split( /\=/ ); - attr[ s[ 0 ] ] = s[ 1 ]; - } - } + // Process escapes only + url = this.dialect.inline.__call__.call( this, url, /\\/ )[0]; - return attr; -} + attrs = { href: url || "" }; + if ( m[3] !== undefined) + attrs.title = m[3]; -function split_meta_hash( meta_string ) { - var meta = meta_string.split( "" ), - parts = [ "" ], - in_quotes = false; - - while ( meta.length ) { - var letter = meta.shift(); - switch ( letter ) { - case " " : - // if we're in a quoted section, keep it - if ( in_quotes ) { - parts[ parts.length - 1 ] += letter; + link = [ "link", attrs ].concat( children ); + return [ consumed, link ]; } - // otherwise make a new part - else { - parts.push( "" ); - } - break; - case "'" : - case '"' : - // reverse the quotes and move straight on - in_quotes = !in_quotes; - break; - case "\\" : - // shift off the next letter to be used straight away. - // it was escaped so we'll keep it whatever it is - letter = meta.shift(); - default : - parts[ parts.length - 1 ] += letter; - break; - } - } - return parts; -} + // [Alt text][id] + // [Alt text] [id] + m = text.match( /^\s*\[(.*?)\]/ ); -Markdown.dialects.Maruku.block.document_meta = function document_meta( block, next ) { - // we're only interested in the first block - if ( block.lineNumber > 1 ) return undefined; + if ( m ) { - // document_meta blocks consist of one or more lines of `Key: Value\n` - if ( ! block.match( /^(?:\w+:.*\n)*\w+:.*$/ ) ) return undefined; + consumed += m[ 0 ].length; - // make an attribute node if it doesn't exist - if ( !extract_attr( this.tree ) ) { - this.tree.splice( 1, 0, {} ); - } + // [links][] uses links as its reference + attrs = { ref: ( m[ 1 ] || String(children) ).toLowerCase(), original: orig.substr( 0, consumed ) }; - var pairs = block.split( /\n/ ); - for ( p in pairs ) { - var m = pairs[ p ].match( /(\w+):\s*(.*)$/ ), - key = m[ 1 ].toLowerCase(), - value = m[ 2 ]; + link = [ "link_ref", attrs ].concat( children ); - this.tree[ 1 ][ key ] = value; - } + // We can't check if the reference is known here as it likely wont be + // found till after. Check it in md tree->hmtl tree conversion. + // Store the original so that conversion can revert if the ref isn't found. + return [ consumed, link ]; + } - // document_meta produces no content! - return []; -}; + // [id] + // Only if id is plain (no formatting.) + if ( children.length === 1 && typeof children[0] === "string" ) { -Markdown.dialects.Maruku.block.block_meta = function block_meta( block, next ) { - // check if the last line of the block is an meta hash - var m = block.match( /(^|\n) {0,3}\{:\s*((?:\\\}|[^\}])*)\s*\}$/ ); - if ( !m ) return undefined; + attrs = { ref: children[0].toLowerCase(), original: orig.substr( 0, consumed ) }; + link = [ "link_ref", attrs, children[0] ]; + return [ consumed, link ]; + } - // process the meta hash - var attr = this.dialect.processMetaHash( m[ 2 ] ); + // Just consume the "[" + return [ 1, "[" ]; + }, - var hash; - // if we matched ^ then we need to apply meta to the previous block - if ( m[ 1 ] === "" ) { - var node = this.tree[ this.tree.length - 1 ]; - hash = extract_attr( node ); + "<": function autoLink( text ) { + var m; - // if the node is a string (rather than JsonML), bail - if ( typeof node === "string" ) return undefined; + if ( ( m = text.match( /^<(?:((https?|ftp|mailto):[^>]+)|(.*?@.*?\.[a-zA-Z]+))>/ ) ) !== null ) { + if ( m[3] ) + return [ m[0].length, [ "link", { href: "mailto:" + m[3] }, m[3] ] ]; + else if ( m[2] === "mailto" ) + return [ m[0].length, [ "link", { href: m[1] }, m[1].substr("mailto:".length ) ] ]; + else + return [ m[0].length, [ "link", { href: m[1] }, m[1] ] ]; + } - // create the attribute hash if it doesn't exist - if ( !hash ) { - hash = {}; - node.splice( 1, 0, hash ); - } + return [ 1, "<" ]; + }, - // add the attributes in - for ( a in attr ) { - hash[ a ] = attr[ a ]; - } + "`": function inlineCode( text ) { + // Inline code block. as many backticks as you like to start it + // Always skip over the opening ticks. + var m = text.match( /(`+)(([\s\S]*?)\1)/ ); - // return nothing so the meta hash is removed - return []; - } + if ( m && m[2] ) + return [ m[1].length + m[2].length, [ "inlinecode", m[3] ] ]; + else { + // TODO: No matching end code found - warn! + return [ 1, "`" ]; + } + }, - // pull the meta hash off the block and process what's left - var b = block.replace( /\n.*$/, "" ), - result = this.processBlock( b, [] ); + " \n": function lineBreak() { + return [ 3, [ "linebreak" ] ]; + } - // get or make the attributes hash - hash = extract_attr( result[ 0 ] ); - if ( !hash ) { - hash = {}; - result[ 0 ].splice( 1, 0, hash ); - } + } + }; - // attach the attributes to the block - for ( a in attr ) { - hash[ a ] = attr[ a ]; - } + // Meta Helper/generator method for em and strong handling + function strong_em( tag, md ) { - return result; -}; + var state_slot = tag + "_state", + other_slot = tag === "strong" ? "em_state" : "strong_state"; -Markdown.dialects.Maruku.block.definition_list = function definition_list( block, next ) { - // one or more terms followed by one or more definitions, in a single block - var tight = /^((?:[^\s:].*\n)+):\s+([\s\S]+)$/, - list = [ "dl" ], - i; - - // see if we're dealing with a tight or loose block - if ( ( m = block.match( tight ) ) ) { - // pull subsequent tight DL blocks out of `next` - var blocks = [ block ]; - while ( next.length && tight.exec( next[ 0 ] ) ) { - blocks.push( next.shift() ); + function CloseTag(len) { + this.len_after = len; + this.name = "close_" + md; } - for ( var b = 0; b < blocks.length; ++b ) { - var m = blocks[ b ].match( tight ), - terms = m[ 1 ].replace( /\n$/, "" ).split( /\n/ ), - defns = m[ 2 ].split( /\n:\s+/ ); + return function ( text ) { - // print( uneval( m ) ); + if ( this[state_slot][0] === md ) { + // Most recent em is of this type + //D:this.debug("closing", md); + this[state_slot].shift(); - for ( i = 0; i < terms.length; ++i ) { - list.push( [ "dt", terms[ i ] ] ); + // "Consume" everything to go back to the recrusion in the else-block below + return[ text.length, new CloseTag(text.length-md.length) ]; } + else { + // Store a clone of the em/strong states + var other = this[other_slot].slice(), + state = this[state_slot].slice(); - for ( i = 0; i < defns.length; ++i ) { - // run inline processing over the definition - list.push( [ "dd" ].concat( this.processInline( defns[ i ].replace( /(\n)\s+/, "$1" ) ) ) ); - } - } - } - else { - return undefined; - } + this[state_slot].unshift(md); - return [ list ]; -}; + //D:this.debug_indent += " "; -Markdown.dialects.Maruku.inline[ "{:" ] = function inline_meta( text, matches, out ) { - if ( !out.length ) { - return [ 2, "{:" ]; - } + // Recurse + var res = this.processInline( text.substr( md.length ) ); + //D:this.debug_indent = this.debug_indent.substr(2); - // get the preceeding element - var before = out[ out.length - 1 ]; + var last = res[res.length - 1]; - if ( typeof before === "string" ) { - return [ 2, "{:" ]; - } + //D:this.debug("processInline from", tag + ": ", uneval( res ) ); - // match a meta hash - var m = text.match( /^\{:\s*((?:\\\}|[^\}])*)\s*\}/ ); + var check = this[state_slot].shift(); + if ( last instanceof CloseTag ) { + res.pop(); + // We matched! Huzzah. + var consumed = text.length - last.len_after; + return [ consumed, [ tag ].concat(res) ]; + } + else { + // Restore the state of the other kind. We might have mistakenly closed it. + this[other_slot] = other; + this[state_slot] = state; - // no match, false alarm - if ( !m ) { - return [ 2, "{:" ]; + // We can't reuse the processed result as it could have wrong parsing contexts in it. + return [ md.length, md ]; + } + } + }; // End returned function } - // attach the attributes to the preceeding element - var meta = this.dialect.processMetaHash( m[ 1 ] ), - attr = extract_attr( before ); + Gruber.inline["**"] = strong_em("strong", "**"); + Gruber.inline["__"] = strong_em("strong", "__"); + Gruber.inline["*"] = strong_em("em", "*"); + Gruber.inline["_"] = strong_em("em", "_"); - if ( !attr ) { - attr = {}; - before.splice( 1, 0, attr ); - } + Markdown.dialects.Gruber = Gruber; + Markdown.buildBlockOrder ( Markdown.dialects.Gruber.block ); + Markdown.buildInlinePatterns( Markdown.dialects.Gruber.inline ); - for ( var k in meta ) { - attr[ k ] = meta[ k ]; - } - // cut out the string and replace it with nothing - return [ m[ 0 ].length, "" ]; -}; -Markdown.buildBlockOrder ( Markdown.dialects.Maruku.block ); -Markdown.buildInlinePatterns( Markdown.dialects.Maruku.inline ); + var Maruku = DialectHelpers.subclassDialect( Gruber ), + extract_attr = MarkdownHelpers.extract_attr, + forEach = MarkdownHelpers.forEach; -var isArray = Array.isArray || function(obj) { - return Object.prototype.toString.call(obj) == "[object Array]"; -}; + Maruku.processMetaHash = function processMetaHash( meta_string ) { + var meta = split_meta_hash( meta_string ), + attr = {}; -var forEach; -// Don't mess with Array.prototype. Its not friendly -if ( Array.prototype.forEach ) { - forEach = function( arr, cb, thisp ) { - return arr.forEach( cb, thisp ); + for ( var i = 0; i < meta.length; ++i ) { + // id: #foo + if ( /^#/.test( meta[ i ] ) ) + attr.id = meta[ i ].substring( 1 ); + // class: .foo + else if ( /^\./.test( meta[ i ] ) ) { + // if class already exists, append the new one + if ( attr["class"] ) + attr["class"] = attr["class"] + meta[ i ].replace( /./, " " ); + else + attr["class"] = meta[ i ].substring( 1 ); + } + // attribute: foo=bar + else if ( /\=/.test( meta[ i ] ) ) { + var s = meta[ i ].split( /\=/ ); + attr[ s[ 0 ] ] = s[ 1 ]; + } + } + + return attr; }; -} -else { - forEach = function(arr, cb, thisp) { - for (var i = 0; i < arr.length; i++) { - cb.call(thisp || arr, arr[i], i, arr); + + function split_meta_hash( meta_string ) { + var meta = meta_string.split( "" ), + parts = [ "" ], + in_quotes = false; + + while ( meta.length ) { + var letter = meta.shift(); + switch ( letter ) { + case " " : + // if we're in a quoted section, keep it + if ( in_quotes ) + parts[ parts.length - 1 ] += letter; + // otherwise make a new part + else + parts.push( "" ); + break; + case "'" : + case '"' : + // reverse the quotes and move straight on + in_quotes = !in_quotes; + break; + case "\\" : + // shift off the next letter to be used straight away. + // it was escaped so we'll keep it whatever it is + letter = meta.shift(); + /* falls through */ + default : + parts[ parts.length - 1 ] += letter; + break; + } } + + return parts; } -} -var isEmpty = function( obj ) { - for ( var key in obj ) { - if ( hasOwnProperty.call( obj, key ) ) { - return false; + Maruku.block.document_meta = function document_meta( block ) { + // we're only interested in the first block + if ( block.lineNumber > 1 ) + return undefined; + + // document_meta blocks consist of one or more lines of `Key: Value\n` + if ( ! block.match( /^(?:\w+:.*\n)*\w+:.*$/ ) ) + return undefined; + + // make an attribute node if it doesn't exist + if ( !extract_attr( this.tree ) ) + this.tree.splice( 1, 0, {} ); + + var pairs = block.split( /\n/ ); + for ( var p in pairs ) { + var m = pairs[ p ].match( /(\w+):\s*(.*)$/ ), + key = m[ 1 ].toLowerCase(), + value = m[ 2 ]; + + this.tree[ 1 ][ key ] = value; } - } - return true; -} + // document_meta produces no content! + return []; + }; -function extract_attr( jsonml ) { - return isArray(jsonml) - && jsonml.length > 1 - && typeof jsonml[ 1 ] === "object" - && !( isArray(jsonml[ 1 ]) ) - ? jsonml[ 1 ] - : undefined; -} + Maruku.block.block_meta = function block_meta( block ) { + // check if the last line of the block is an meta hash + var m = block.match( /(^|\n) {0,3}\{:\s*((?:\\\}|[^\}])*)\s*\}$/ ); + if ( !m ) + return undefined; + // process the meta hash + var attr = this.dialect.processMetaHash( m[ 2 ] ), + hash; + // if we matched ^ then we need to apply meta to the previous block + if ( m[ 1 ] === "" ) { + var node = this.tree[ this.tree.length - 1 ]; + hash = extract_attr( node ); -/** - * renderJsonML( jsonml[, options] ) -> String - * - jsonml (Array): JsonML array to render to XML - * - options (Object): options - * - * Converts the given JsonML into well-formed XML. - * - * The options currently understood are: - * - * - root (Boolean): wether or not the root node should be included in the - * output, or just its children. The default `false` is to not include the - * root itself. - */ -expose.renderJsonML = function( jsonml, options ) { - options = options || {}; - // include the root element in the rendered output? - options.root = options.root || false; + // if the node is a string (rather than JsonML), bail + if ( typeof node === "string" ) + return undefined; - var content = []; + // create the attribute hash if it doesn't exist + if ( !hash ) { + hash = {}; + node.splice( 1, 0, hash ); + } - if ( options.root ) { - content.push( render_tree( jsonml ) ); - } - else { - jsonml.shift(); // get rid of the tag - if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) { - jsonml.shift(); // get rid of the attributes + // add the attributes in + for ( var a in attr ) + hash[ a ] = attr[ a ]; + + // return nothing so the meta hash is removed + return []; } - while ( jsonml.length ) { - content.push( render_tree( jsonml.shift() ) ); + // pull the meta hash off the block and process what's left + var b = block.replace( /\n.*$/, "" ), + result = this.processBlock( b, [] ); + + // get or make the attributes hash + hash = extract_attr( result[ 0 ] ); + if ( !hash ) { + hash = {}; + result[ 0 ].splice( 1, 0, hash ); } - } - return content.join( "\n\n" ); -}; + // attach the attributes to the block + for ( var a in attr ) + hash[ a ] = attr[ a ]; -function escapeHTML( text ) { - return text.replace( /&/g, "&amp;" ) - .replace( /</g, "&lt;" ) - .replace( />/g, "&gt;" ) - .replace( /"/g, "&quot;" ) - .replace( /'/g, "&#39;" ); -} + return result; + }; -function render_tree( jsonml ) { - // basic case - if ( typeof jsonml === "string" ) { - return escapeHTML( jsonml ); - } + Maruku.block.definition_list = function definition_list( block, next ) { + // one or more terms followed by one or more definitions, in a single block + var tight = /^((?:[^\s:].*\n)+):\s+([\s\S]+)$/, + list = [ "dl" ], + i, m; - var tag = jsonml.shift(), - attributes = {}, - content = []; + // see if we're dealing with a tight or loose block + if ( ( m = block.match( tight ) ) ) { + // pull subsequent tight DL blocks out of `next` + var blocks = [ block ]; + while ( next.length && tight.exec( next[ 0 ] ) ) + blocks.push( next.shift() ); - if ( jsonml.length && typeof jsonml[ 0 ] === "object" && !( jsonml[ 0 ] instanceof Array ) ) { - attributes = jsonml.shift(); - } + for ( var b = 0; b < blocks.length; ++b ) { + var m = blocks[ b ].match( tight ), + terms = m[ 1 ].replace( /\n$/, "" ).split( /\n/ ), + defns = m[ 2 ].split( /\n:\s+/ ); - while ( jsonml.length ) { - content.push( render_tree( jsonml.shift() ) ); - } + // print( uneval( m ) ); - var tag_attrs = ""; - for ( var a in attributes ) { - tag_attrs += " " + a + '="' + escapeHTML( attributes[ a ] ) + '"'; - } + for ( i = 0; i < terms.length; ++i ) + list.push( [ "dt", terms[ i ] ] ); - // be careful about adding whitespace here for inline elements - if ( tag == "img" || tag == "br" || tag == "hr" ) { - return "<"+ tag + tag_attrs + "/>"; - } - else { - return "<"+ tag + tag_attrs + ">" + content.join( "" ) + "</" + tag + ">"; - } -} + for ( i = 0; i < defns.length; ++i ) { + // run inline processing over the definition + list.push( [ "dd" ].concat( this.processInline( defns[ i ].replace( /(\n)\s+/, "$1" ) ) ) ); + } + } + } + else { + return undefined; + } -function convert_tree_to_html( tree, references, options ) { - var i; - options = options || {}; + return [ list ]; + }; - // shallow clone - var jsonml = tree.slice( 0 ); + // splits on unescaped instances of @ch. If @ch is not a character the result + // can be unpredictable - if ( typeof options.preprocessTreeNode === "function" ) { - jsonml = options.preprocessTreeNode(jsonml, references); - } + Maruku.block.table = function table ( block ) { - // Clone attributes if they exist - var attrs = extract_attr( jsonml ); - if ( attrs ) { - jsonml[ 1 ] = {}; - for ( i in attrs ) { - jsonml[ 1 ][ i ] = attrs[ i ]; + var _split_on_unescaped = function( s, ch ) { + ch = ch || '\\s'; + if ( ch.match(/^[\\|\[\]{}?*.+^$]$/) ) + ch = '\\' + ch; + var res = [ ], + r = new RegExp('^((?:\\\\.|[^\\\\' + ch + '])*)' + ch + '(.*)'), + m; + while ( ( m = s.match( r ) ) ) { + res.push( m[1] ); + s = m[2]; + } + res.push(s); + return res; + }; + + var leading_pipe = /^ {0,3}\|(.+)\n {0,3}\|\s*([\-:]+[\-| :]*)\n((?:\s*\|.*(?:\n|$))*)(?=\n|$)/, + // find at least an unescaped pipe in each line + no_leading_pipe = /^ {0,3}(\S(?:\\.|[^\\|])*\|.*)\n {0,3}([\-:]+\s*\|[\-| :]*)\n((?:(?:\\.|[^\\|])*\|.*(?:\n|$))*)(?=\n|$)/, + i, + m; + if ( ( m = block.match( leading_pipe ) ) ) { + // remove leading pipes in contents + // (header and horizontal rule already have the leading pipe left out) + m[3] = m[3].replace(/^\s*\|/gm, ''); + } else if ( ! ( m = block.match( no_leading_pipe ) ) ) { + return undefined; } - attrs = jsonml[ 1 ]; - } - // basic case - if ( typeof jsonml === "string" ) { - return jsonml; - } + var table = [ "table", [ "thead", [ "tr" ] ], [ "tbody" ] ]; - // convert this node - switch ( jsonml[ 0 ] ) { - case "header": - jsonml[ 0 ] = "h" + jsonml[ 1 ].level; - delete jsonml[ 1 ].level; - break; - case "bulletlist": - jsonml[ 0 ] = "ul"; - break; - case "numberlist": - jsonml[ 0 ] = "ol"; - break; - case "listitem": - jsonml[ 0 ] = "li"; - break; - case "para": - jsonml[ 0 ] = "p"; - break; - case "markdown": - jsonml[ 0 ] = "html"; - if ( attrs ) delete attrs.references; - break; - case "code_block": - jsonml[ 0 ] = "pre"; - i = attrs ? 2 : 1; - var code = [ "code" ]; - code.push.apply( code, jsonml.splice( i, jsonml.length - i ) ); - jsonml[ i ] = code; - break; - case "inlinecode": - jsonml[ 0 ] = "code"; - break; - case "img": - jsonml[ 1 ].src = jsonml[ 1 ].href; - delete jsonml[ 1 ].href; - break; - case "linebreak": - jsonml[ 0 ] = "br"; - break; - case "link": - jsonml[ 0 ] = "a"; - break; - case "link_ref": - jsonml[ 0 ] = "a"; + // remove trailing pipes, then split on pipes + // (no escaped pipes are allowed in horizontal rule) + m[2] = m[2].replace(/\|\s*$/, '').split('|'); - // grab this ref and clean up the attribute node - var ref = references[ attrs.ref ]; + // process alignment + var html_attrs = [ ]; + forEach (m[2], function (s) { + if (s.match(/^\s*-+:\s*$/)) + html_attrs.push({align: "right"}); + else if (s.match(/^\s*:-+\s*$/)) + html_attrs.push({align: "left"}); + else if (s.match(/^\s*:-+:\s*$/)) + html_attrs.push({align: "center"}); + else + html_attrs.push({}); + }); - // if the reference exists, make the link - if ( ref ) { - delete attrs.ref; + // now for the header, avoid escaped pipes + m[1] = _split_on_unescaped(m[1].replace(/\|\s*$/, ''), '|'); + for (i = 0; i < m[1].length; i++) { + table[1][1].push(['th', html_attrs[i] || {}].concat( + this.processInline(m[1][i].trim()))); + } - // add in the href and title, if present - attrs.href = ref.href; - if ( ref.title ) { - attrs.title = ref.title; - } + // now for body contents + forEach (m[3].replace(/\|\s*$/mg, '').split('\n'), function (row) { + var html_row = ['tr']; + row = _split_on_unescaped(row, '|'); + for (i = 0; i < row.length; i++) + html_row.push(['td', html_attrs[i] || {}].concat(this.processInline(row[i].trim()))); + table[2].push(html_row); + }, this); - // get rid of the unneeded original text - delete attrs.original; - } - // the reference doesn't exist, so revert to plain text - else { - return attrs.original; - } - break; - case "img_ref": - jsonml[ 0 ] = "img"; + return [table]; + }; - // grab this ref and clean up the attribute node - var ref = references[ attrs.ref ]; + Maruku.inline[ "{:" ] = function inline_meta( text, matches, out ) { + if ( !out.length ) + return [ 2, "{:" ]; - // if the reference exists, make the link - if ( ref ) { - delete attrs.ref; + // get the preceeding element + var before = out[ out.length - 1 ]; - // add in the href and title, if present - attrs.src = ref.href; - if ( ref.title ) { - attrs.title = ref.title; - } + if ( typeof before === "string" ) + return [ 2, "{:" ]; - // get rid of the unneeded original text - delete attrs.original; - } - // the reference doesn't exist, so revert to plain text - else { - return attrs.original; - } - break; - } + // match a meta hash + var m = text.match( /^\{:\s*((?:\\\}|[^\}])*)\s*\}/ ); - // convert all the children - i = 1; + // no match, false alarm + if ( !m ) + return [ 2, "{:" ]; - // deal with the attribute node, if it exists - if ( attrs ) { - // if there are keys, skip over it - for ( var key in jsonml[ 1 ] ) { - i = 2; + // attach the attributes to the preceeding element + var meta = this.dialect.processMetaHash( m[ 1 ] ), + attr = extract_attr( before ); + + if ( !attr ) { + attr = {}; + before.splice( 1, 0, attr ); } - // if there aren't, remove it - if ( i === 1 ) { - jsonml.splice( i, 1 ); - } - } - for ( ; i < jsonml.length; ++i ) { - jsonml[ i ] = convert_tree_to_html( jsonml[ i ], references, options ); - } + for ( var k in meta ) + attr[ k ] = meta[ k ]; - return jsonml; -} + // cut out the string and replace it with nothing + return [ m[ 0 ].length, "" ]; + }; -// merges adjacent text nodes into a single node -function merge_text_nodes( jsonml ) { - // skip the tag name and attribute hash - var i = extract_attr( jsonml ) ? 2 : 1; + Markdown.dialects.Maruku = Maruku; + Markdown.dialects.Maruku.inline.__escape__ = /^\\[\\`\*_{}\[\]()#\+.!\-|:]/; + Markdown.buildBlockOrder ( Markdown.dialects.Maruku.block ); + Markdown.buildInlinePatterns( Markdown.dialects.Maruku.inline ); - while ( i < jsonml.length ) { - // if it's a string check the next item too - if ( typeof jsonml[ i ] === "string" ) { - if ( i + 1 < jsonml.length && typeof jsonml[ i + 1 ] === "string" ) { - // merge the second string into the first and remove it - jsonml[ i ] += jsonml.splice( i + 1, 1 )[ 0 ]; - } - else { - ++i; - } - } - // if it's not a string recurse - else { - merge_text_nodes( jsonml[ i ] ); - ++i; - } - } -} -} )( (function() { - if ( typeof exports === "undefined" ) { - window.markdown = {}; - return window.markdown; - } - else { - return exports; - } -} )() ); +// Include all our depndencies and; + expose.Markdown = Markdown; + expose.parse = Markdown.parse; + expose.toHTML = Markdown.toHTML; + expose.toHTMLTree = Markdown.toHTMLTree; + expose.renderJsonML = Markdown.renderJsonML; + +})(function() { + window.markdown = {}; + return window.markdown; +}());