ext/redcarpet/markdown.c in redcarpet-2.1.1 vs ext/redcarpet/markdown.c in redcarpet-2.2.0
- old
+ new
@@ -23,10 +23,14 @@
#include <assert.h>
#include <string.h>
#include <ctype.h>
#include <stdio.h>
+#if defined(_WIN32)
+#define strncasecmp _strnicmp
+#endif
+
#define REF_TABLE_SIZE 8
#define BUFFER_BLOCK 0
#define BUFFER_SPAN 1
@@ -495,11 +499,11 @@
if (i >= size) return 0;
if (data[i] == c && !_isspace(data[i - 1])) {
if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
- if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1])))
+ if (i + i < size && isalnum(data[i + 1]))
continue;
}
work = rndr_newbuf(rndr, BUFFER_SPAN);
parse_inline(work, rndr, data, i);
@@ -590,10 +594,15 @@
char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size)
{
uint8_t c = data[0];
size_t ret;
+ if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) {
+ if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>')
+ return 0;
+ }
+
if (size > 2 && data[1] != c) {
/* whitespace cannot follow an opening emphasis;
* strikethrough only takes two characters '~~' */
if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0)
return 0;
@@ -765,11 +774,11 @@
if (!rndr->cb.link || rndr->in_link_body)
return 0;
link = rndr_newbuf(rndr, BUFFER_SPAN);
- if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) {
+ if ((link_len = sd_autolink__www(&rewind, link, data, offset, size, 0)) > 0) {
link_url = rndr_newbuf(rndr, BUFFER_SPAN);
BUFPUTSL(link_url, "http://");
bufput(link_url, link->data, link->size);
ob->size -= rewind;
@@ -797,11 +806,11 @@
if (!rndr->cb.autolink || rndr->in_link_body)
return 0;
link = rndr_newbuf(rndr, BUFFER_SPAN);
- if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) {
+ if ((link_len = sd_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
ob->size -= rewind;
rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque);
}
rndr_popbuf(rndr, BUFFER_SPAN);
@@ -817,11 +826,11 @@
if (!rndr->cb.autolink || rndr->in_link_body)
return 0;
link = rndr_newbuf(rndr, BUFFER_SPAN);
- if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) {
+ if ((link_len = sd_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
ob->size -= rewind;
rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque);
}
rndr_popbuf(rndr, BUFFER_SPAN);
@@ -1148,13 +1157,14 @@
}
return n >= 3;
}
-/* check if a line is a code fence; return its size if it is */
+/* check if a line begins with a code fence; return the
+ * width of the code fence */
static size_t
-is_codefence(uint8_t *data, size_t size, struct buf *syntax)
+prefix_codefence(uint8_t *data, size_t size)
{
size_t i = 0, n = 0;
uint8_t c;
/* skipping initial spaces */
@@ -1175,47 +1185,60 @@
}
if (n < 3)
return 0;
- if (syntax != NULL) {
- size_t syn = 0;
+ return i;
+}
- while (i < size && data[i] == ' ')
- i++;
+/* check if a line is a code fence; return its size if it is */
+static size_t
+is_codefence(uint8_t *data, size_t size, struct buf *syntax)
+{
+ size_t i = 0, syn_len = 0;
+ uint8_t *syn_start;
- syntax->data = data + i;
+ i = prefix_codefence(data, size);
+ if (i == 0)
+ return 0;
- if (i < size && data[i] == '{') {
- i++; syntax->data++;
+ while (i < size && data[i] == ' ')
+ i++;
- while (i < size && data[i] != '}' && data[i] != '\n') {
- syn++; i++;
- }
+ syn_start = data + i;
- if (i == size || data[i] != '}')
- return 0;
+ if (i < size && data[i] == '{') {
+ i++; syn_start++;
- /* strip all whitespace at the beginning and the end
- * of the {} block */
- while (syn > 0 && _isspace(syntax->data[0])) {
- syntax->data++; syn--;
- }
+ while (i < size && data[i] != '}' && data[i] != '\n') {
+ syn_len++; i++;
+ }
- while (syn > 0 && _isspace(syntax->data[syn - 1]))
- syn--;
+ if (i == size || data[i] != '}')
+ return 0;
- i++;
- } else {
- while (i < size && !_isspace(data[i])) {
- syn++; i++;
- }
+ /* strip all whitespace at the beginning and the end
+ * of the {} block */
+ while (syn_len > 0 && _isspace(syn_start[0])) {
+ syn_start++; syn_len--;
}
- syntax->size = syn;
+ while (syn_len > 0 && _isspace(syn_start[syn_len - 1]))
+ syn_len--;
+
+ i++;
+ } else {
+ while (i < size && !_isspace(data[i])) {
+ syn_len++; i++;
+ }
}
+ if (syntax) {
+ syntax->data = syn_start;
+ syntax->size = syn_len;
+ }
+
while (i < size && data[i] != '\n') {
if (!_isspace(data[i]))
return 0;
i++;
@@ -1414,23 +1437,52 @@
struct buf work = { data, 0, 0, 0 };
while (i < size) {
for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
- if (is_empty(data + i, size - i) || (level = is_headerline(data + i, size - i)) != 0)
+ if (is_empty(data + i, size - i))
break;
- if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
- if (data[i] == '<' && rndr->cb.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
+ if ((level = is_headerline(data + i, size - i)) != 0)
+ break;
+
+ if (is_atxheader(rndr, data + i, size - i) ||
+ is_hrule(data + i, size - i) ||
+ prefix_quote(data + i, size - i)) {
+ end = i;
+ break;
+ }
+
+ /*
+ * Early termination of a paragraph with the same logic
+ * as Markdown 1.0.0. If this logic is applied, the
+ * Markdown 1.0.3 test suite won't pass cleanly
+ *
+ * :: If the first character in a new line is not a letter,
+ * let's check to see if there's some kind of block starting
+ * here
+ */
+ if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) {
+ if (prefix_oli(data + i, size - i) ||
+ prefix_uli(data + i, size - i)) {
end = i;
break;
}
- }
- if (is_atxheader(rndr, data + i, size - i) || is_hrule(data + i, size - i)) {
- end = i;
- break;
+ /* see if an html block starts here */
+ if (data[i] == '<' && rndr->cb.blockhtml &&
+ parse_htmlblock(ob, rndr, data + i, size - i, 0)) {
+ end = i;
+ break;
+ }
+
+ /* see if a code fence starts here */
+ if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 &&
+ is_codefence(data + i, size - i, NULL) != 0) {
+ end = i;
+ break;
+ }
}
i = end;
}
@@ -1498,13 +1550,14 @@
work = rndr_newbuf(rndr, BUFFER_BLOCK);
while (beg < size) {
size_t fence_end;
+ struct buf fence_trail = { 0, 0, 0, 0 };
- fence_end = is_codefence(data + beg, size - beg, NULL);
- if (fence_end != 0) {
+ fence_end = is_codefence(data + beg, size - beg, &fence_trail);
+ if (fence_end != 0 && fence_trail.size == 0) {
beg += fence_end;
break;
}
for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
@@ -1575,12 +1628,11 @@
static size_t
parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags)
{
struct buf *work = 0, *inter = 0;
size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
- int in_empty = 0, has_inside_empty = 0;
- size_t has_next_uli, has_next_oli;
+ int in_empty = 0, has_inside_empty = 0, in_fence = 0;
/* keeping track of the first indentation prefix */
while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
orgpre++;
@@ -1604,10 +1656,12 @@
bufput(work, data + beg, end - beg);
beg = end;
/* process the following lines */
while (beg < size) {
+ size_t has_next_uli = 0, has_next_oli = 0;
+
end++;
while (end < size && data[end - 1] != '\n')
end++;
@@ -1623,13 +1677,22 @@
while (i < 4 && beg + i < end && data[beg + i] == ' ')
i++;
pre = i;
- has_next_uli = prefix_uli(data + beg + i, end - beg - i);
- has_next_oli = prefix_oli(data + beg + i, end - beg - i);
+ if (rndr->ext_flags & MKDEXT_FENCED_CODE) {
+ if (is_codefence(data + beg + i, end - beg - i, NULL) != 0)
+ in_fence = !in_fence;
+ }
+ /* Only check for new list items if we are **not** inside
+ * a fenced code block */
+ if (!in_fence) {
+ has_next_uli = prefix_uli(data + beg + i, end - beg - i);
+ has_next_oli = prefix_oli(data + beg + i, end - beg - i);
+ }
+
/* checking for ul/ol switch */
if (in_empty && (
((*flags & MKD_LIST_ORDERED) && has_next_uli) ||
(!(*flags & MKD_LIST_ORDERED) && has_next_oli))){
*flags |= MKD_LI_END;
@@ -1645,14 +1708,16 @@
break; /* the same indentation */
if (!sublist)
sublist = work->size;
}
- /* joining only indented stuff after empty lines */
- else if (in_empty && i < 4) {
- *flags |= MKD_LI_END;
- break;
+ /* joining only indented stuff after empty lines;
+ * note that now we only require 1 space of indentation
+ * to continue a list */
+ else if (in_empty && pre == 0) {
+ *flags |= MKD_LI_END;
+ break;
}
else if (in_empty) {
bufputc(work, '\n');
has_inside_empty = 1;
}
@@ -1756,11 +1821,16 @@
/* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */
/* returns the length on match, 0 otherwise */
static size_t
-htmlblock_end(const char *tag, size_t tag_len, struct sd_markdown *rndr, uint8_t *data, size_t size)
+htmlblock_end_tag(
+ const char *tag,
+ size_t tag_len,
+ struct sd_markdown *rndr,
+ uint8_t *data,
+ size_t size)
{
size_t i, w;
/* checking if tag is a match */
if (tag_len + 3 >= size ||
@@ -1774,29 +1844,64 @@
if (i < size && (w = is_empty(data + i, size - i)) == 0)
return 0; /* non-blank after tag */
i += w;
w = 0;
- if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) {
- if (i < size)
- w = is_empty(data + i, size - i);
- } else {
- if (i < size && (w = is_empty(data + i, size - i)) == 0)
- return 0; /* non-blank line after tag line */
- }
+ if (i < size)
+ w = is_empty(data + i, size - i);
return i + w;
}
+static size_t
+htmlblock_end(const char *curtag,
+ struct sd_markdown *rndr,
+ uint8_t *data,
+ size_t size,
+ int start_of_line)
+{
+ size_t tag_size = strlen(curtag);
+ size_t i = 1, end_tag;
+ int block_lines = 0;
+ while (i < size) {
+ i++;
+ while (i < size && !(data[i - 1] == '<' && data[i] == '/')) {
+ if (data[i] == '\n')
+ block_lines++;
+
+ i++;
+ }
+
+ /* If we are only looking for unindented tags, skip the tag
+ * if it doesn't follow a newline.
+ *
+ * The only exception to this is if the tag is still on the
+ * initial line; in that case it still counts as a closing
+ * tag
+ */
+ if (start_of_line && block_lines > 0 && data[i - 2] != '\n')
+ continue;
+
+ if (i + 2 + tag_size >= size)
+ break;
+
+ end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1);
+ if (end_tag)
+ return i + end_tag - 1;
+ }
+
+ return 0;
+}
+
+
/* parse_htmlblock • parsing of inline HTML block */
static size_t
parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render)
{
- size_t i, j = 0;
+ size_t i, j = 0, tag_end;
const char *curtag = NULL;
- int found;
struct buf work = { data, 0, 0, 0 };
/* identification of the opening tag */
if (size < 2 || data[0] != '<')
return 0;
@@ -1853,44 +1958,27 @@
return 0;
}
/* looking for an unindented matching closing tag */
/* followed by a blank line */
- i = 1;
- found = 0;
+ tag_end = htmlblock_end(curtag, rndr, data, size, 1);
/* if not found, trying a second pass looking for indented match */
/* but not if tag is "ins" or "del" (following original Markdown.pl) */
- if (strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
- size_t tag_size = strlen(curtag);
- i = 1;
- while (i < size) {
- i++;
- while (i < size && !(data[i - 1] == '<' && data[i] == '/'))
- i++;
-
- if (i + 2 + tag_size >= size)
- break;
-
- j = htmlblock_end(curtag, tag_size, rndr, data + i - 1, size - i + 1);
-
- if (j) {
- i += j - 1;
- found = 1;
- break;
- }
- }
+ if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) {
+ tag_end = htmlblock_end(curtag, rndr, data, size, 0);
}
- if (!found) return 0;
+ if (!tag_end)
+ return 0;
/* the end of the block has been found */
- work.size = i;
+ work.size = tag_end;
if (do_render && rndr->cb.blockhtml)
rndr->cb.blockhtml(ob, &work, rndr->opaque);
- return i;
+ return tag_end;
}
static void
parse_table_row(
struct buf *ob,
@@ -2458,11 +2546,11 @@
}
void
sd_version(int *ver_major, int *ver_minor, int *ver_revision)
{
- *ver_major = UPSKIRT_VER_MAJOR;
- *ver_minor = UPSKIRT_VER_MINOR;
- *ver_revision = UPSKIRT_VER_REVISION;
+ *ver_major = SUNDOWN_VER_MAJOR;
+ *ver_minor = SUNDOWN_VER_MINOR;
+ *ver_revision = SUNDOWN_VER_REVISION;
}
/* vim: set filetype=c: */