ext/redcarpet/markdown.c in redcarpet-2.1.1 vs ext/redcarpet/markdown.c in redcarpet-2.2.0

- old
+ new

@@ -23,10 +23,14 @@ #include <assert.h> #include <string.h> #include <ctype.h> #include <stdio.h> +#if defined(_WIN32) +#define strncasecmp _strnicmp +#endif + #define REF_TABLE_SIZE 8 #define BUFFER_BLOCK 0 #define BUFFER_SPAN 1 @@ -495,11 +499,11 @@ if (i >= size) return 0; if (data[i] == c && !_isspace(data[i - 1])) { if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) { - if (!(i + 1 == size || _isspace(data[i + 1]) || ispunct(data[i + 1]))) + if (i + i < size && isalnum(data[i + 1])) continue; } work = rndr_newbuf(rndr, BUFFER_SPAN); parse_inline(work, rndr, data, i); @@ -590,10 +594,15 @@ char_emphasis(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t offset, size_t size) { uint8_t c = data[0]; size_t ret; + if (rndr->ext_flags & MKDEXT_NO_INTRA_EMPHASIS) { + if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>') + return 0; + } + if (size > 2 && data[1] != c) { /* whitespace cannot follow an opening emphasis; * strikethrough only takes two characters '~~' */ if (c == '~' || _isspace(data[1]) || (ret = parse_emph1(ob, rndr, data + 1, size - 1, c)) == 0) return 0; @@ -765,11 +774,11 @@ if (!rndr->cb.link || rndr->in_link_body) return 0; link = rndr_newbuf(rndr, BUFFER_SPAN); - if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) { + if ((link_len = sd_autolink__www(&rewind, link, data, offset, size, 0)) > 0) { link_url = rndr_newbuf(rndr, BUFFER_SPAN); BUFPUTSL(link_url, "http://"); bufput(link_url, link->data, link->size); ob->size -= rewind; @@ -797,11 +806,11 @@ if (!rndr->cb.autolink || rndr->in_link_body) return 0; link = rndr_newbuf(rndr, BUFFER_SPAN); - if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) { + if ((link_len = sd_autolink__email(&rewind, link, data, offset, size, 0)) > 0) { ob->size -= rewind; rndr->cb.autolink(ob, link, MKDA_EMAIL, rndr->opaque); } rndr_popbuf(rndr, BUFFER_SPAN); @@ -817,11 +826,11 @@ if (!rndr->cb.autolink || rndr->in_link_body) return 0; link = rndr_newbuf(rndr, BUFFER_SPAN); - if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) { + if ((link_len = sd_autolink__url(&rewind, link, data, offset, size, 0)) > 0) { ob->size -= rewind; rndr->cb.autolink(ob, link, MKDA_NORMAL, rndr->opaque); } rndr_popbuf(rndr, BUFFER_SPAN); @@ -1148,13 +1157,14 @@ } return n >= 3; } -/* check if a line is a code fence; return its size if it is */ +/* check if a line begins with a code fence; return the + * width of the code fence */ static size_t -is_codefence(uint8_t *data, size_t size, struct buf *syntax) +prefix_codefence(uint8_t *data, size_t size) { size_t i = 0, n = 0; uint8_t c; /* skipping initial spaces */ @@ -1175,47 +1185,60 @@ } if (n < 3) return 0; - if (syntax != NULL) { - size_t syn = 0; + return i; +} - while (i < size && data[i] == ' ') - i++; +/* check if a line is a code fence; return its size if it is */ +static size_t +is_codefence(uint8_t *data, size_t size, struct buf *syntax) +{ + size_t i = 0, syn_len = 0; + uint8_t *syn_start; - syntax->data = data + i; + i = prefix_codefence(data, size); + if (i == 0) + return 0; - if (i < size && data[i] == '{') { - i++; syntax->data++; + while (i < size && data[i] == ' ') + i++; - while (i < size && data[i] != '}' && data[i] != '\n') { - syn++; i++; - } + syn_start = data + i; - if (i == size || data[i] != '}') - return 0; + if (i < size && data[i] == '{') { + i++; syn_start++; - /* strip all whitespace at the beginning and the end - * of the {} block */ - while (syn > 0 && _isspace(syntax->data[0])) { - syntax->data++; syn--; - } + while (i < size && data[i] != '}' && data[i] != '\n') { + syn_len++; i++; + } - while (syn > 0 && _isspace(syntax->data[syn - 1])) - syn--; + if (i == size || data[i] != '}') + return 0; - i++; - } else { - while (i < size && !_isspace(data[i])) { - syn++; i++; - } + /* strip all whitespace at the beginning and the end + * of the {} block */ + while (syn_len > 0 && _isspace(syn_start[0])) { + syn_start++; syn_len--; } - syntax->size = syn; + while (syn_len > 0 && _isspace(syn_start[syn_len - 1])) + syn_len--; + + i++; + } else { + while (i < size && !_isspace(data[i])) { + syn_len++; i++; + } } + if (syntax) { + syntax->data = syn_start; + syntax->size = syn_len; + } + while (i < size && data[i] != '\n') { if (!_isspace(data[i])) return 0; i++; @@ -1414,23 +1437,52 @@ struct buf work = { data, 0, 0, 0 }; while (i < size) { for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */; - if (is_empty(data + i, size - i) || (level = is_headerline(data + i, size - i)) != 0) + if (is_empty(data + i, size - i)) break; - if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) { - if (data[i] == '<' && rndr->cb.blockhtml && parse_htmlblock(ob, rndr, data + i, size - i, 0)) { + if ((level = is_headerline(data + i, size - i)) != 0) + break; + + if (is_atxheader(rndr, data + i, size - i) || + is_hrule(data + i, size - i) || + prefix_quote(data + i, size - i)) { + end = i; + break; + } + + /* + * Early termination of a paragraph with the same logic + * as Markdown 1.0.0. If this logic is applied, the + * Markdown 1.0.3 test suite won't pass cleanly + * + * :: If the first character in a new line is not a letter, + * let's check to see if there's some kind of block starting + * here + */ + if ((rndr->ext_flags & MKDEXT_LAX_SPACING) && !isalnum(data[i])) { + if (prefix_oli(data + i, size - i) || + prefix_uli(data + i, size - i)) { end = i; break; } - } - if (is_atxheader(rndr, data + i, size - i) || is_hrule(data + i, size - i)) { - end = i; - break; + /* see if an html block starts here */ + if (data[i] == '<' && rndr->cb.blockhtml && + parse_htmlblock(ob, rndr, data + i, size - i, 0)) { + end = i; + break; + } + + /* see if a code fence starts here */ + if ((rndr->ext_flags & MKDEXT_FENCED_CODE) != 0 && + is_codefence(data + i, size - i, NULL) != 0) { + end = i; + break; + } } i = end; } @@ -1498,13 +1550,14 @@ work = rndr_newbuf(rndr, BUFFER_BLOCK); while (beg < size) { size_t fence_end; + struct buf fence_trail = { 0, 0, 0, 0 }; - fence_end = is_codefence(data + beg, size - beg, NULL); - if (fence_end != 0) { + fence_end = is_codefence(data + beg, size - beg, &fence_trail); + if (fence_end != 0 && fence_trail.size == 0) { beg += fence_end; break; } for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); @@ -1575,12 +1628,11 @@ static size_t parse_listitem(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int *flags) { struct buf *work = 0, *inter = 0; size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i; - int in_empty = 0, has_inside_empty = 0; - size_t has_next_uli, has_next_oli; + int in_empty = 0, has_inside_empty = 0, in_fence = 0; /* keeping track of the first indentation prefix */ while (orgpre < 3 && orgpre < size && data[orgpre] == ' ') orgpre++; @@ -1604,10 +1656,12 @@ bufput(work, data + beg, end - beg); beg = end; /* process the following lines */ while (beg < size) { + size_t has_next_uli = 0, has_next_oli = 0; + end++; while (end < size && data[end - 1] != '\n') end++; @@ -1623,13 +1677,22 @@ while (i < 4 && beg + i < end && data[beg + i] == ' ') i++; pre = i; - has_next_uli = prefix_uli(data + beg + i, end - beg - i); - has_next_oli = prefix_oli(data + beg + i, end - beg - i); + if (rndr->ext_flags & MKDEXT_FENCED_CODE) { + if (is_codefence(data + beg + i, end - beg - i, NULL) != 0) + in_fence = !in_fence; + } + /* Only check for new list items if we are **not** inside + * a fenced code block */ + if (!in_fence) { + has_next_uli = prefix_uli(data + beg + i, end - beg - i); + has_next_oli = prefix_oli(data + beg + i, end - beg - i); + } + /* checking for ul/ol switch */ if (in_empty && ( ((*flags & MKD_LIST_ORDERED) && has_next_uli) || (!(*flags & MKD_LIST_ORDERED) && has_next_oli))){ *flags |= MKD_LI_END; @@ -1645,14 +1708,16 @@ break; /* the same indentation */ if (!sublist) sublist = work->size; } - /* joining only indented stuff after empty lines */ - else if (in_empty && i < 4) { - *flags |= MKD_LI_END; - break; + /* joining only indented stuff after empty lines; + * note that now we only require 1 space of indentation + * to continue a list */ + else if (in_empty && pre == 0) { + *flags |= MKD_LI_END; + break; } else if (in_empty) { bufputc(work, '\n'); has_inside_empty = 1; } @@ -1756,11 +1821,16 @@ /* htmlblock_end • checking end of HTML block : </tag>[ \t]*\n[ \t*]\n */ /* returns the length on match, 0 otherwise */ static size_t -htmlblock_end(const char *tag, size_t tag_len, struct sd_markdown *rndr, uint8_t *data, size_t size) +htmlblock_end_tag( + const char *tag, + size_t tag_len, + struct sd_markdown *rndr, + uint8_t *data, + size_t size) { size_t i, w; /* checking if tag is a match */ if (tag_len + 3 >= size || @@ -1774,29 +1844,64 @@ if (i < size && (w = is_empty(data + i, size - i)) == 0) return 0; /* non-blank after tag */ i += w; w = 0; - if (rndr->ext_flags & MKDEXT_LAX_HTML_BLOCKS) { - if (i < size) - w = is_empty(data + i, size - i); - } else { - if (i < size && (w = is_empty(data + i, size - i)) == 0) - return 0; /* non-blank line after tag line */ - } + if (i < size) + w = is_empty(data + i, size - i); return i + w; } +static size_t +htmlblock_end(const char *curtag, + struct sd_markdown *rndr, + uint8_t *data, + size_t size, + int start_of_line) +{ + size_t tag_size = strlen(curtag); + size_t i = 1, end_tag; + int block_lines = 0; + while (i < size) { + i++; + while (i < size && !(data[i - 1] == '<' && data[i] == '/')) { + if (data[i] == '\n') + block_lines++; + + i++; + } + + /* If we are only looking for unindented tags, skip the tag + * if it doesn't follow a newline. + * + * The only exception to this is if the tag is still on the + * initial line; in that case it still counts as a closing + * tag + */ + if (start_of_line && block_lines > 0 && data[i - 2] != '\n') + continue; + + if (i + 2 + tag_size >= size) + break; + + end_tag = htmlblock_end_tag(curtag, tag_size, rndr, data + i - 1, size - i + 1); + if (end_tag) + return i + end_tag - 1; + } + + return 0; +} + + /* parse_htmlblock • parsing of inline HTML block */ static size_t parse_htmlblock(struct buf *ob, struct sd_markdown *rndr, uint8_t *data, size_t size, int do_render) { - size_t i, j = 0; + size_t i, j = 0, tag_end; const char *curtag = NULL; - int found; struct buf work = { data, 0, 0, 0 }; /* identification of the opening tag */ if (size < 2 || data[0] != '<') return 0; @@ -1853,44 +1958,27 @@ return 0; } /* looking for an unindented matching closing tag */ /* followed by a blank line */ - i = 1; - found = 0; + tag_end = htmlblock_end(curtag, rndr, data, size, 1); /* if not found, trying a second pass looking for indented match */ /* but not if tag is "ins" or "del" (following original Markdown.pl) */ - if (strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) { - size_t tag_size = strlen(curtag); - i = 1; - while (i < size) { - i++; - while (i < size && !(data[i - 1] == '<' && data[i] == '/')) - i++; - - if (i + 2 + tag_size >= size) - break; - - j = htmlblock_end(curtag, tag_size, rndr, data + i - 1, size - i + 1); - - if (j) { - i += j - 1; - found = 1; - break; - } - } + if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) { + tag_end = htmlblock_end(curtag, rndr, data, size, 0); } - if (!found) return 0; + if (!tag_end) + return 0; /* the end of the block has been found */ - work.size = i; + work.size = tag_end; if (do_render && rndr->cb.blockhtml) rndr->cb.blockhtml(ob, &work, rndr->opaque); - return i; + return tag_end; } static void parse_table_row( struct buf *ob, @@ -2458,11 +2546,11 @@ } void sd_version(int *ver_major, int *ver_minor, int *ver_revision) { - *ver_major = UPSKIRT_VER_MAJOR; - *ver_minor = UPSKIRT_VER_MINOR; - *ver_revision = UPSKIRT_VER_REVISION; + *ver_major = SUNDOWN_VER_MAJOR; + *ver_minor = SUNDOWN_VER_MINOR; + *ver_revision = SUNDOWN_VER_REVISION; } /* vim: set filetype=c: */