markdown.c in redcarpet-2.2.0

- old
+ new

@@ -23,10 +23,14 @@
 #include <assert.h>
 #include <string.h>
 #include <ctype.h>
 #include <stdio.h>
 
+#if defined(_WIN32)
+#define strncasecmp	_strnicmp
+#endif
+
 #define REF_TABLE_SIZE 8
 
 #define BUFFER_BLOCK 0
 #define BUFFER_SPAN 1
 
@@ -495,11 +499,11 @@
 		if (i >= size) return 0;
 
 		if (data[i] == c && !_isspace(data[i - 1])) {
 
 			if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
-				if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
+				if (i + i < size && isalnum(data[i + 1]))
 					continue;
 			}
 
 			work = rndr_newbuf(rndr, BUFFER_SPAN);
 			parse_inline(work, rndr, data, i);
@@ -590,10 +594,15 @@
 char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
 {
 	uint8_t c = data[0];
 	size_t ret;
 
+	if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
+		if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>')
+			return 0;
+	}
+
 	if (size > 2 && data[1] != c) {
 		/* whitespace cannot follow an opening emphasis;
 		 * strikethrough only takes two characters '~~' */
 		if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
 			return 0;
@@ -765,11 +774,11 @@
 	if (!rndr->cb.link || rndr->in_link_body)
 		return 0;
 
 	link = rndr_newbuf(rndr, BUFFER_SPAN);
 
-	if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) {
+	if ((link_len = sd_autolink__www(&rewind, link, data, offset, size, 0)) > 0) {
 		link_url = rndr_newbuf(rndr, BUFFER_SPAN);
 		BUFPUTSL(link_url, "http://");
 		bufput(link_url, link->data, link->size);
 
 		ob->size -= rewind;
@@ -797,11 +806,11 @@
 	if (!rndr->cb.autolink || rndr->in_link_body)
 		return 0;
 
 	link = rndr_newbuf(rndr, BUFFER_SPAN);
 
-	if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) {
+	if ((link_len = sd_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
 		ob->size -= rewind;
 		rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
 	}
 
 	rndr_popbuf(rndr, BUFFER_SPAN);
@@ -817,11 +826,11 @@
 	if (!rndr->cb.autolink || rndr->in_link_body)
 		return 0;
 
 	link = rndr_newbuf(rndr, BUFFER_SPAN);
 
-	if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) {
+	if ((link_len = sd_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
 		ob->size -= rewind;
 		rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
 	}
 
 	rndr_popbuf(rndr, BUFFER_SPAN);
@@ -1148,13 +1157,14 @@
 	}
 
 	return n >= 3;
 }
 
-/* check if a line is a code fence; return its size if it is */
+/* check if a line begins with a code fence; return the
+ * width of the code fence */
 static size_t
-is_codefence(uint8_t *data, size_t size, struct buf *syntax)
+prefix_codefence(uint8_t *data, size_t size)
 {
 	size_t i = 0, n = 0;
 	uint8_t c;
 
 	/* skipping initial spaces */
@@ -1175,47 +1185,60 @@
 	}
 
 	if (n < 3)
 		return 0;
 
-	if (syntax != NULL) {
-		size_t syn = 0;
+	return i;
+}
 
-		while (i < size && data[i] == ' ')
-			i++;
+/* check if a line is a code fence; return its size if it is */
+static size_t
+is_codefence(uint8_t *data, size_t size, struct buf *syntax)
+{
+	size_t i = 0, syn_len = 0;
+	uint8_t *syn_start;
 
-		syntax->data = data + i;
+	i = prefix_codefence(data, size);
+	if (i == 0)
+		return 0;
 
-		if (i < size && data[i] == '{') {
-			i++; syntax->data++;
+	while (i < size && data[i] == ' ')
+		i++;
 
-			while (i < size && data[i] != '}' && data[i] != '\n') {
-				syn++; i++;
-			}
+	syn_start = data + i;
 
-			if (i == size || data[i] != '}')
-				return 0;
+	if (i < size && data[i] == '{') {
+		i++; syn_start++;
 
-			/* strip all whitespace at the beginning and the end
-			 * of the {} block */
-			while (syn > 0 && _isspace(syntax->data[0])) {
-				syntax->data++; syn--;
-			}
+		while (i < size && data[i] != '}' && data[i] != '\n') {
+			syn_len++; i++;
+		}
 
-			while (syn > 0 && _isspace(syntax->data[syn - 1]))
-				syn--;
+		if (i == size || data[i] != '}')
+			return 0;
 
-			i++;
-		} else {
-			while (i < size && !_isspace(data[i])) {
-				syn++; i++;
-			}
+		/* strip all whitespace at the beginning and the end
+		 * of the {} block */
+		while (syn_len > 0 && _isspace(syn_start[0])) {
+			syn_start++; syn_len--;
 		}
 
-		syntax->size = syn;
+		while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
+			syn_len--;
+
+		i++;
+	} else {
+		while (i < size && !_isspace(data[i])) {
+			syn_len++; i++;
+		}
 	}
 
+	if (syntax) {
+		syntax->data = syn_start;
+		syntax->size = syn_len;
+	}
+
 	while (i < size && data[i] != '\n') {
 		if (!_isspace(data[i]))
 			return 0;
 
 		i++;
@@ -1414,23 +1437,52 @@
 	struct buf work = { data, 0, 0, 0 };
 
 	while (i < size) {
 		for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
 
-		if (is_empty(data + i, size - i) || (level = is_headerline(data + i, size - i)) != 0)
+		if (is_empty(data + i, size - i))
 			break;
 
-		if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
-			if (data[i] == '<' && rndr->cb.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
+		if ((level = is_headerline(data + i, size - i)) != 0)
+			break;
+
+		if (is_atxheader(rndr, data + i, size - i) ||
+			is_hrule(data + i, size - i) ||
+			prefix_quote(data + i, size - i)) {
+			end = i;
+			break;
+		}
+
+		/*
+		 * Early termination of a paragraph with the same logic
+		 * as Markdown 1.0.0. If this logic is applied, the
+		 * Markdown 1.0.3 test suite won't pass cleanly
+		 *
+		 * :: If the first character in a new line is not a letter,
+		 * let's check to see if there's some kind of block starting
+		 * here
+		 */
+		if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
+			if (prefix_oli(data + i, size - i) ||
+				prefix_uli(data + i, size - i)) {
 				end = i;
 				break;
 			}
-		}
 
-		if (is_atxheader(rndr, data + i, size - i) || is_hrule(data + i, size - i)) {
-			end = i;
-			break;
+			/* see if an html block starts here */
+			if (data[i] == '<' && rndr->cb.blockhtml &&
+				parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
+				end = i;
+				break;
+			}
+
+			/* see if a code fence starts here */
+			if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
+				is_codefence(data + i, size - i, NULL) != 0) {
+				end = i;
+				break;
+			}
 		}
 
 		i = end;
 	}
 
@@ -1498,13 +1550,14 @@
 
 	work = rndr_newbuf(rndr, BUFFER_BLOCK);
 
 	while (beg < size) {
 		size_t fence_end;
+		struct buf fence_trail = { 0, 0, 0, 0 };
 
-		fence_end = is_codefence(data + beg, size - beg, NULL);
-		if (fence_end != 0) {
+		fence_end = is_codefence(data + beg, size - beg, &fence_trail);
+		if (fence_end != 0 && fence_trail.size == 0) {
 			beg += fence_end;
 			break;
 		}
 
 		for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
@@ -1575,12 +1628,11 @@
 static size_t
 parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
 {
 	struct buf *work = 0, *inter = 0;
 	size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
-	int in_empty = 0, has_inside_empty = 0;
-	size_t has_next_uli, has_next_oli;
+	int in_empty = 0, has_inside_empty = 0, in_fence = 0;
 
 	/* keeping track of the first indentation prefix */
 	while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
 		orgpre++;
 
@@ -1604,10 +1656,12 @@
 	bufput(work, data + beg, end - beg);
 	beg = end;
 
 	/* process the following lines */
 	while (beg < size) {
+		size_t has_next_uli = 0, has_next_oli = 0;
+
 		end++;
 
 		while (end < size && data[end - 1] != '\n')
 			end++;
 
@@ -1623,13 +1677,22 @@
 		while (i < 4 && beg + i < end && data[beg + i] == ' ')
 			i++;
 
 		pre = i;
 
-		has_next_uli = prefix_uli(data + beg + i, end - beg - i);
-		has_next_oli = prefix_oli(data + beg + i, end - beg - i);
+		if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
+			if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
+				in_fence = !in_fence;
+		}
 
+		/* Only check for new list items if we are **not** inside
+		 * a fenced code block */
+		if (!in_fence) {
+			has_next_uli = prefix_uli(data + beg + i, end - beg - i);
+			has_next_oli = prefix_oli(data + beg + i, end - beg - i);
+		}
+
 		/* checking for ul/ol switch */
 		if (in_empty && (
 			((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
 			(!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
 			*flags |= MKD_LI_END;
@@ -1645,14 +1708,16 @@
 				break;             /* the same indentation */
 
 			if (!sublist)
 				sublist = work->size;
 		}
-		/* joining only indented stuff after empty lines */
-		else if (in_empty && i < 4) {
-				*flags |= MKD_LI_END;
-				break;
+		/* joining only indented stuff after empty lines;
+		 * note that now we only require 1 space of indentation
+		 * to continue a list */
+		else if (in_empty && pre == 0) {
+			*flags |= MKD_LI_END;
+			break;
 		}
 		else if (in_empty) {
 			bufputc(work, '\n');
 			has_inside_empty = 1;
 		}
@@ -1756,11 +1821,16 @@
 
 
 /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
 /*	returns the length on match, 0 otherwise */
 static size_t
-htmlblock_end(const char *tag, size_t tag_len, struct sd_markdown *rndr, uint8_t *data, size_t size)
+htmlblock_end_tag(
+	const char *tag,
+	size_t tag_len,
+	struct sd_markdown *rndr,
+	uint8_t *data,
+	size_t size)
 {
 	size_t i, w;
 
 	/* checking if tag is a match */
 	if (tag_len + 3 >= size ||
@@ -1774,29 +1844,64 @@
 	if (i < size && (w = is_empty(data + i, size - i)) == 0)
 		return 0; /* non-blank after tag */
 	i += w;
 	w = 0;
 
-	if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
-		if (i < size)
-			w = is_empty(data + i, size - i);
-	} else  {
-		if (i < size && (w = is_empty(data + i, size - i)) == 0)
-			return 0; /* non-blank line after tag line */
-	}
+	if (i < size)
+		w = is_empty(data + i, size - i);
 
 	return i + w;
 }
 
+static size_t
+htmlblock_end(const char *curtag,
+	struct sd_markdown *rndr,
+	uint8_t *data,
+	size_t size,
+	int start_of_line)
+{
+	size_t tag_size = strlen(curtag);
+	size_t i = 1, end_tag;
+	int block_lines = 0;
 
+	while (i < size) {
+		i++;
+		while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
+			if (data[i] == '\n')
+				block_lines++;
+
+			i++;
+		}
+
+		/* If we are only looking for unindented tags, skip the tag
+		 * if it doesn't follow a newline.
+		 *
+		 * The only exception to this is if the tag is still on the
+		 * initial line; in that case it still counts as a closing
+		 * tag
+		 */
+		if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
+			continue;
+
+		if (i + 2 + tag_size >= size)
+			break;
+
+		end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
+		if (end_tag)
+			return i + end_tag - 1;
+	}
+
+	return 0;
+}
+
+
 /* parse_htmlblock • parsing of inline HTML block */
 static size_t
 parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
 {
-	size_t i, j = 0;
+	size_t i, j = 0, tag_end;
 	const char *curtag = NULL;
-	int found;
 	struct buf work = { data, 0, 0, 0 };
 
 	/* identification of the opening tag */
 	if (size < 2 || data[0] != '<')
 		return 0;
@@ -1853,44 +1958,27 @@
 		return 0;
 	}
 
 	/* looking for an unindented matching closing tag */
 	/*	followed by a blank line */
-	i = 1;
-	found = 0;
+	tag_end = htmlblock_end(curtag, rndr, data, size, 1);
 
 	/* if not found, trying a second pass looking for indented match */
 	/* but not if tag is "ins" or "del" (following original Markdown.pl) */
-	if (strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
-		size_t tag_size = strlen(curtag);
-		i = 1;
-		while (i < size) {
-			i++;
-			while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
-				i++;
-
-			if (i + 2 + tag_size >= size)
-				break;
-
-			j = htmlblock_end(curtag, tag_size, rndr, data + i - 1, size - i + 1);
-
-			if (j) {
-				i += j - 1;
-				found = 1;
-				break;
-			}
-		}
+	if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
+		tag_end = htmlblock_end(curtag, rndr, data, size, 0);
 	}
 
-	if (!found) return 0;
+	if (!tag_end)
+		return 0;
 
 	/* the end of the block has been found */
-	work.size = i;
+	work.size = tag_end;
 	if (do_render && rndr->cb.blockhtml)
 		rndr->cb.blockhtml(ob, &work, rndr->opaque);
 
-	return i;
+	return tag_end;
 }
 
 static void
 parse_table_row(
 	struct buf *ob,
@@ -2458,11 +2546,11 @@
 }
 
 void
 sd_version(int *ver_major, int *ver_minor, int *ver_revision)
 {
-	*ver_major = UPSKIRT_VER_MAJOR;
-	*ver_minor = UPSKIRT_VER_MINOR;
-	*ver_revision = UPSKIRT_VER_REVISION;
+	*ver_major = SUNDOWN_VER_MAJOR;
+	*ver_minor = SUNDOWN_VER_MINOR;
+	*ver_revision = SUNDOWN_VER_REVISION;
 }
 
 /* vim: set filetype=c: */