ext/redcarpet/markdown.c in redcarpet-1.14.2 vs ext/redcarpet/markdown.c in redcarpet-1.15.0
- old
+ new
@@ -20,11 +20,11 @@
#include "markdown.h"
#include "array.h"
#include <assert.h>
#include <string.h>
-#include <strings.h> /* for strncasecmp */
+//#include <strings.h> /* for strncasecmp */
#include <ctype.h>
#include <stdio.h>
#define BUFFER_BLOCK 0
#define BUFFER_SPAN 1
@@ -54,11 +54,13 @@
static size_t char_linebreak(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
static size_t char_codespan(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
static size_t char_escape(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
static size_t char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
static size_t char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
-static size_t char_autolink(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
+static size_t char_autolink_url(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
+static size_t char_autolink_email(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
+static size_t char_autolink_www(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
static size_t char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
enum markdown_char_t {
MD_CHAR_NONE = 0,
MD_CHAR_EMPHASIS,
@@ -66,11 +68,13 @@
MD_CHAR_LINEBREAK,
MD_CHAR_LINK,
MD_CHAR_LANGLE,
MD_CHAR_ESCAPE,
MD_CHAR_ENTITITY,
- MD_CHAR_AUTOLINK,
+ MD_CHAR_AUTOLINK_URL,
+ MD_CHAR_AUTOLINK_EMAIL,
+ MD_CHAR_AUTOLINK_WWW
};
static char_trigger markdown_char_ptrs[] = {
NULL,
&char_emphasis,
@@ -78,11 +82,13 @@
&char_linebreak,
&char_link,
&char_langle_tag,
&char_escape,
&char_entity,
- &char_autolink,
+ &char_autolink_url,
+ &char_autolink_email,
+ &char_autolink_www,
};
/* render • structure containing one particular render */
struct render {
struct mkd_renderer make;
@@ -140,16 +146,26 @@
{ "ol", 2 },
{ "ul", 2 },
/*10*/ { "del", 3 },
{ "div", 3 },
/*12*/ { "ins", 3 },
+ { "nav", 3 },
{ "pre", 3 },
+ { "abbr", 4 },
{ "form", 4 },
{ "math", 4 },
+ { "aside", 5 },
{ "table", 5 },
+ { "canvas", 6 },
+ { "figure", 6 },
+ { "footer", 6 },
+ { "header", 6 },
+ { "hgroup", 6 },
{ "iframe", 6 },
{ "script", 6 },
+ { "article", 7 },
+ { "section", 7 },
{ "fieldset", 8 },
{ "noscript", 8 },
{ "blockquote", 10 }
};
@@ -222,11 +238,11 @@
static int
cmp_html_tag(const void *a, const void *b)
{
const struct html_tag *hta = a;
const struct html_tag *htb = b;
- if (hta->size != htb->size) return (int)((ssize_t)hta->size - (ssize_t)htb->size);
+ if (hta->size != htb->size) return (int)(hta->size - htb->size);
return strncasecmp(hta->text, htb->text, hta->size);
}
/* find_block_tag • returns the current block tag */
@@ -304,11 +320,11 @@
return 0;
/* scheme test */
*autolink = MKDA_NOT_AUTOLINK;
- /* try to find the beggining of an URI */
+ /* try to find the beginning of an URI */
while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
i++;
if (i > 1 && data[i] == '@') {
if ((j = is_mail_autolink(data + i, size - i)) != 0) {
@@ -355,11 +371,11 @@
size_t i = 0, end = 0;
char action = 0;
struct buf work = { 0, 0, 0, 0, 0 };
if (rndr->work_bufs[BUFFER_SPAN].size +
- rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
+ rndr->work_bufs[BUFFER_BLOCK].size > (int)rndr->max_nesting)
return;
while (i < size) {
/* copying inactive chars into the output */
while (end < size && (action = rndr->active_char[(unsigned char)data[end]]) == 0) {
@@ -668,11 +684,11 @@
return 2;
}
/* char_entity • '&' escaped when it doesn't belong to an entity */
-/* valid entities are assumed to be anything mathing &#?[A-Za-z0-9]+; */
+/* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
static size_t
char_entity(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
{
size_t end = 1;
struct buf work;
@@ -723,36 +739,48 @@
if (!ret) return 0;
else return end;
}
static size_t
-char_autolink(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
+autolink_delim(char *data, size_t link_end, size_t offset, size_t size)
{
- struct buf work = { data, 0, 0, 0, 0 };
char copen = 0;
- size_t link_end;
- if (offset > 0) {
- if (!isspace(data[-1]) && !ispunct(data[-1]))
- return 0;
+ /* See if the link finishes with a punctuation sign that can be skipped. */
+ switch (data[link_end - 1]) {
+ case '?':
+ case '!':
+ case '.':
+ case ',':
+ link_end--;
+ break;
+
+ case ';':
+ {
+ size_t new_end = link_end - 2;
+
+ while (new_end > 0 && isalpha(data[new_end]))
+ new_end--;
+
+ if (new_end < link_end - 2 && data[new_end] == '&')
+ link_end = new_end;
+ else
+ link_end--;
+
+ break;
}
- if (!is_safe_link(data, size))
- return 0;
+ case '>':
+ while (link_end > 0 && data[link_end] != '<')
+ link_end--;
- link_end = 0;
- while (link_end < size && !isspace(data[link_end]))
- link_end++;
+ if (link_end == 0)
+ return 0;
- /* Skip punctuation at the end of the link */
- if ((data[link_end - 1] == '.' ||
- data[link_end - 1] == ',' ||
- data[link_end - 1] == ';') &&
- data[link_end - 2] != '\\')
- link_end--;
+ break;
+ }
- /* See if the link finishes with a punctuation sign that can be closed. */
switch (data[link_end - 1]) {
case '"': copen = '"'; break;
case '\'': copen = '\''; break;
case ')': copen = '('; break;
case ']': copen = '['; break;
@@ -760,13 +788,12 @@
}
if (copen != 0) {
char *buf_start = data - offset;
char *buf_end = data + link_end - 2;
+ char *open_delim = NULL;
- size_t open_delim = 1;
-
/* Try to close the final punctuation sign in this same line;
* if we managed to close it outside of the URL, that means that it's
* not part of the URL. If it closes inside the URL, that means it
* is part of the URL.
*
@@ -783,35 +810,165 @@
*
* (foo http://www.pokemon.com/Pikachu_(Electric)) bar
* => foo http://www.pokemon.com/Pikachu_(Electric)
*/
- while (buf_end >= buf_start && *buf_end != '\n' && open_delim) {
- if (*buf_end == data[link_end - 1])
- open_delim++;
-
+ while (buf_end >= buf_start && *buf_end != '\n') {
if (*buf_end == copen)
- open_delim--;
+ open_delim = buf_end;
buf_end--;
}
- if (open_delim == 0)
+ if (open_delim != NULL && open_delim < data)
link_end--;
}
+ return link_end;
+}
+
+
+static size_t
+char_autolink_www(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
+{
+ struct buf work = { 0, 0, 0, 0, 0 };
+ size_t link_end;
+ int np = 0;
+
+ if (offset > 0 && !ispunct(data[-1]) && !isspace(data[-1]))
+ return 0;
+
+ if (size < 4 || memcmp(data, "www.", STRLEN("www.")) != 0)
+ return 0;
+
+ link_end = 0;
+ while (link_end < size && !isspace(data[link_end])) {
+ if (data[link_end] == '.')
+ np++;
+
+ link_end++;
+ }
+
+ if (np < 2)
+ return 0;
+
+ link_end = autolink_delim(data, link_end, offset, size);
+
+ if (link_end == 0)
+ return 0;
+
work.size = link_end;
+ work.data = data;
+ if (rndr->make.link) {
+ struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
+ BUFPUTSL(u_link, "http://");
+ unscape_text(u_link, &work);
+
+ rndr->make.link(ob, u_link, NULL, &work, rndr->make.opaque);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ }
+
+ return link_end;
+}
+
+static size_t
+char_autolink_email(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
+{
+ struct buf work = { 0, 0, 0, 0, 0 };
+ size_t link_end, rewind;
+ int nb = 0, np = 0;
+
+ for (rewind = 0; rewind < offset; ++rewind) {
+ char c = data[-rewind - 1];
+
+ if (isalnum(c))
+ continue;
+
+ if (strchr(".!#$%&*+-/=?^_`|~", c) != NULL)
+ continue;
+
+ break;
+ }
+
+ if (rewind == 0)
+ return 0;
+
+ for (link_end = 0; link_end < size; ++link_end) {
+ char c = data[link_end];
+
+ if (isalnum(c))
+ continue;
+
+ if (c == '@')
+ nb++;
+ else if (c == '.')
+ np++;
+ else if (c != '-' && c != '_')
+ break;
+ }
+
+ if (link_end < 2 || nb != 1 || np == 0)
+ return 0;
+
+ link_end = autolink_delim(data, link_end, offset, size);
+
+ if (link_end == 0)
+ return 0;
+
+ work.size = link_end + rewind;
+ work.data = data - rewind;
+
if (rndr->make.autolink) {
struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
unscape_text(u_link, &work);
+ ob->size -= rewind;
+ rndr->make.autolink(ob, u_link, MKDA_EMAIL, rndr->make.opaque);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+ }
+
+ return link_end;
+}
+
+static size_t
+char_autolink_url(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
+{
+ struct buf work = { 0, 0, 0, 0, 0 };
+ size_t link_end, rewind = 0;
+
+ if (size < 4 || data[1] != '/' || data[2] != '/')
+ return 0;
+
+ while (rewind < offset && isalpha(data[-rewind - 1]))
+ rewind++;
+
+ if (!is_safe_link(data - rewind, size + rewind))
+ return 0;
+
+ link_end = 0;
+ while (link_end < size && !isspace(data[link_end]))
+ link_end++;
+
+ link_end = autolink_delim(data, link_end, offset, size);
+
+ if (link_end == 0)
+ return 0;
+
+ work.size = link_end + rewind;
+ work.data = data - rewind;
+
+ if (rndr->make.autolink) {
+ struct buf *u_link = rndr_newbuf(rndr, BUFFER_SPAN);
+ unscape_text(u_link, &work);
+
+ ob->size -= rewind;
rndr->make.autolink(ob, u_link, MKDA_NORMAL, rndr->make.opaque);
rndr_popbuf(rndr, BUFFER_SPAN);
}
- return work.size;
+ return link_end;
}
/* char_link • '[': parsing a link or an image */
static size_t
char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
@@ -1122,11 +1279,11 @@
}
if (i == size || data[i] != '}')
return 0;
- /* strip all whitespace at the beggining and the end
+ /* strip all whitespace at the beginning and the end
* of the {} block */
while (syn > 0 && isspace(syntax->data[0])) {
syntax->data++; syn--;
}
@@ -1253,11 +1410,11 @@
/* parse_block • parsing of one block, returning next char to parse */
static void parse_block(struct buf *ob, struct render *rndr,
char *data, size_t size);
-/* parse_blockquote • hanldes parsing of a blockquote fragment */
+/* parse_blockquote • handles parsing of a blockquote fragment */
static size_t
parse_blockquote(struct buf *ob, struct render *rndr, char *data, size_t size)
{
size_t beg, end = 0, pre, work_size = 0;
char *work_data = 0;
@@ -1298,11 +1455,11 @@
}
static size_t
parse_htmlblock(struct buf *ob, struct render *rndr, char *data, size_t size, int do_render);
-/* parse_blockquote • hanldes parsing of a regular paragraph */
+/* parse_blockquote • handles parsing of a regular paragraph */
static size_t
parse_paragraph(struct buf *ob, struct render *rndr, char *data, size_t size)
{
size_t i = 0, end = 0;
int level = 0;
@@ -1378,11 +1535,11 @@
}
return end;
}
-/* parse_fencedcode • hanldes parsing of a block-level code fragment */
+/* parse_fencedcode • handles parsing of a block-level code fragment */
static size_t
parse_fencedcode(struct buf *ob, struct render *rndr, char *data, size_t size)
{
size_t beg, end;
struct buf *work = 0;
@@ -1472,11 +1629,11 @@
{
struct buf *work = 0, *inter = 0;
size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i;
int in_empty = 0, has_inside_empty = 0;
- /* keeping book of the first indentation prefix */
+ /* keeping track of the first indentation prefix */
while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
orgpre++;
beg = prefix_uli(data, size);
if (!beg)
@@ -1648,11 +1805,11 @@
{
size_t i, w;
/* assuming data[0] == '<' && data[1] == '/' already tested */
- /* checking tag is a match */
+ /* checking if tag is a match */
if (tag->size + 3 >= size
|| strncasecmp(data + 2, tag->text, tag->size)
|| data[tag->size + 2] != '>')
return 0;
@@ -1947,11 +2104,11 @@
size_t beg, end, i;
char *txt_data;
beg = 0;
if (rndr->work_bufs[BUFFER_SPAN].size +
- rndr->work_bufs[BUFFER_BLOCK].size > rndr->max_nesting)
+ rndr->work_bufs[BUFFER_BLOCK].size > (int)rndr->max_nesting)
return;
while (beg < size) {
txt_data = data + beg;
end = size - beg;
@@ -2185,17 +2342,12 @@
rndr.active_char['<'] = MD_CHAR_LANGLE;
rndr.active_char['\\'] = MD_CHAR_ESCAPE;
rndr.active_char['&'] = MD_CHAR_ENTITITY;
if (extensions & MKDEXT_AUTOLINK) {
- rndr.active_char['h'] = MD_CHAR_AUTOLINK; // http, https
- rndr.active_char['H'] = MD_CHAR_AUTOLINK;
-
- rndr.active_char['f'] = MD_CHAR_AUTOLINK; // ftp
- rndr.active_char['F'] = MD_CHAR_AUTOLINK;
-
- rndr.active_char['m'] = MD_CHAR_AUTOLINK; // mailto
- rndr.active_char['M'] = MD_CHAR_AUTOLINK;
+ rndr.active_char[':'] = MD_CHAR_AUTOLINK_URL;
+ rndr.active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
+ rndr.active_char['w'] = MD_CHAR_AUTOLINK_WWW;
}
/* Extension data */
rndr.ext_flags = extensions;
rndr.max_nesting = 16;