ext/redcarpet/markdown.c in redcarpet-1.17.2 vs ext/redcarpet/markdown.c in redcarpet-2.0.0b
- old
+ new
@@ -58,10 +58,11 @@
static size_t char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
static size_t char_autolink_url(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
static size_t char_autolink_email(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
static size_t char_autolink_www(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
static size_t char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
+static size_t char_superscript(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size);
enum markdown_char_t {
MD_CHAR_NONE = 0,
MD_CHAR_EMPHASIS,
MD_CHAR_CODESPAN,
@@ -70,11 +71,12 @@
MD_CHAR_LANGLE,
MD_CHAR_ESCAPE,
MD_CHAR_ENTITITY,
MD_CHAR_AUTOLINK_URL,
MD_CHAR_AUTOLINK_EMAIL,
- MD_CHAR_AUTOLINK_WWW
+ MD_CHAR_AUTOLINK_WWW,
+ MD_CHAR_SUPERSCRIPT,
};
static char_trigger markdown_char_ptrs[] = {
NULL,
&char_emphasis,
@@ -85,10 +87,11 @@
&char_escape,
&char_entity,
&char_autolink_url,
&char_autolink_email,
&char_autolink_www,
+ &char_superscript,
};
/* render • structure containing one particular render */
struct render {
struct mkd_renderer make;
@@ -143,17 +146,18 @@
{ "h4", 2 },
{ "h5", 2 },
{ "h6", 2 },
{ "ol", 2 },
{ "ul", 2 },
-/*10*/ { "del", 3 },
+ { "del", 3 }, /* 10 */
{ "div", 3 },
-/*12*/ { "ins", 3 },
+ { "ins", 3 }, /* 12 */
{ "pre", 3 },
{ "form", 4 },
{ "math", 4 },
{ "table", 5 },
+ { "figure", 6 },
{ "iframe", 6 },
{ "script", 6 },
{ "fieldset", 8 },
{ "noscript", 8 },
{ "blockquote", 10 }
@@ -222,11 +226,11 @@
/* looking for the word end */
while (i < size && ((data[i] >= '0' && data[i] <= '9')
|| (data[i] >= 'A' && data[i] <= 'Z')
|| (data[i] >= 'a' && data[i] <= 'z')))
- i += 1;
+ i++;
if (i >= size) return 0;
/* binary search of the tag */
key.text = data;
key.size = i;
@@ -315,21 +319,21 @@
while (i < size) {
if (data[i] == '\\') i += 2;
else if (data[i] == '>' || data[i] == '\'' ||
data[i] == '"' || isspace(data[i])) break;
- else i += 1;
+ else i++;
}
if (i >= size) return 0;
if (i > j && data[i] == '>') return i + 1;
/* one of the forbidden chars has been found */
*autolink = MKDA_NOT_AUTOLINK;
}
/* looking for sometinhg looking like a tag end */
- while (i < size && data[i] != '>') i += 1;
+ while (i < size && data[i] != '>') i++;
if (i >= size) return 0;
return i + 1;
}
/* parse_inline • parses inline markdown elements */
@@ -377,51 +381,76 @@
find_emph_char(char *data, size_t size, char c)
{
size_t i = 1;
while (i < size) {
- while (i < size && data[i] != c
- && data[i] != '`' && data[i] != '[')
- i += 1;
- if (data[i] == c) return i;
+ while (i < size && data[i] != c && data[i] != '`' && data[i] != '[')
+ i++;
+ if (i == size)
+ return 0;
+
+ if (data[i] == c)
+ return i;
+
/* not counting escaped chars */
- if (i && data[i - 1] == '\\') { i += 1; continue; }
+ if (i && data[i - 1] == '\\') {
+ i++; continue;
+ }
/* skipping a code span */
if (data[i] == '`') {
size_t tmp_i = 0;
- i += 1;
+
+ i++;
while (i < size && data[i] != '`') {
if (!tmp_i && data[i] == c) tmp_i = i;
- i += 1; }
- if (i >= size) return tmp_i;
- i += 1; }
+ i++;
+ }
+ if (i >= size)
+ return tmp_i;
+
+ i++;
+ }
/* skipping a link */
else if (data[i] == '[') {
size_t tmp_i = 0;
char cc;
- i += 1;
+
+ i++;
while (i < size && data[i] != ']') {
if (!tmp_i && data[i] == c) tmp_i = i;
- i += 1; }
- i += 1;
- while (i < size && (data[i] == ' '
- || data[i] == '\t' || data[i] == '\n'))
- i += 1;
- if (i >= size) return tmp_i;
+ i++;
+ }
+
+ i++;
+ while (i < size && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n'))
+ i++;
+
+ if (i >= size)
+ return tmp_i;
+
if (data[i] != '[' && data[i] != '(') { /* not a link*/
if (tmp_i) return tmp_i;
- else continue; }
+ else continue;
+ }
+
cc = data[i];
- i += 1;
+ i++;
while (i < size && data[i] != cc) {
if (!tmp_i && data[i] == c) tmp_i = i;
- i += 1; }
- if (i >= size) return tmp_i;
- i += 1; } }
+ i++;
+ }
+
+ if (i >= size)
+ return tmp_i;
+
+ i++;
+ }
+ }
+
return 0;
}
/* parse_emph1 • parsing single emphase */
/* closed by a symbol not preceded by whitespace and not followed by symbol */
@@ -442,11 +471,11 @@
if (!len) return 0;
i += len;
if (i >= size) return 0;
if (i + 1 < size && data[i + 1] == c) {
- i += 1;
+ i++;
continue;
}
if (data[i] == c && !isspace(data[i - 1])) {
@@ -666,11 +695,11 @@
while (end < size && isalnum(data[end]))
end++;
if (end < size && data[end] == ';')
- end += 1; /* real entity */
+ end++; /* real entity */
else
return 0; /* lone '&' */
if (rndr->make.entity) {
work.data = data;
@@ -717,11 +746,11 @@
if (!rndr->make.link)
return 0;
link = rndr_newbuf(rndr, BUFFER_SPAN);
- if ((link_len = ups_autolink__www(&rewind, link, data, offset, size)) > 0) {
+ if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) {
link_url = rndr_newbuf(rndr, BUFFER_SPAN);
BUFPUTSL(link_url, "http://");
bufput(link_url, link->data, link->size);
ob->size -= rewind;
@@ -742,11 +771,11 @@
if (!rndr->make.autolink)
return 0;
link = rndr_newbuf(rndr, BUFFER_SPAN);
- if ((link_len = ups_autolink__email(&rewind, link, data, offset, size)) > 0) {
+ if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) {
ob->size -= rewind;
rndr->make.autolink(ob, link, MKDA_EMAIL, rndr->make.opaque);
}
rndr_popbuf(rndr, BUFFER_SPAN);
@@ -762,11 +791,11 @@
if (!rndr->make.autolink)
return 0;
link = rndr_newbuf(rndr, BUFFER_SPAN);
- if ((link_len = ups_autolink__url(&rewind, link, data, offset, size)) > 0) {
+ if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) {
ob->size -= rewind;
rndr->make.autolink(ob, link, MKDA_NORMAL, rndr->make.opaque);
}
rndr_popbuf(rndr, BUFFER_SPAN);
@@ -789,11 +818,11 @@
/* checking whether the correct renderer exists */
if ((is_img && !rndr->make.image) || (!is_img && !rndr->make.link))
goto cleanup;
/* looking for the matching closing bracket */
- for (level = 1; i < size; i += 1) {
+ for (level = 1; i < size; i++) {
if (data[i] == '\n')
text_has_nl = 1;
else if (data[i - 1] == '\\')
continue;
@@ -810,32 +839,32 @@
if (i >= size)
goto cleanup;
txt_e = i;
- i += 1;
+ i++;
/* skip any amount of whitespace or newline */
/* (this is much more laxist than original markdown syntax) */
while (i < size && isspace(data[i]))
i++;
/* inline style link */
if (i < size && data[i] == '(') {
/* skipping initial whitespace */
- i += 1;
+ i++;
while (i < size && isspace(data[i]))
i++;
link_b = i;
/* looking for link end: ' " ) */
while (i < size) {
if (data[i] == '\\') i += 2;
else if (data[i] == ')' || data[i] == '\'' || data[i] == '"') break;
- else i += 1;
+ else i++;
}
if (i >= size) goto cleanup;
link_e = i;
@@ -845,11 +874,11 @@
title_b = i;
while (i < size) {
if (data[i] == '\\') i += 2;
else if (data[i] == ')') break;
- else i += 1;
+ else i++;
}
if (i >= size) goto cleanup;
/* skipping whitespaces after title */
@@ -890,11 +919,11 @@
else if (i < size && data[i] == '[') {
struct buf id = { 0, 0, 0, 0, 0 };
struct link_ref *lr;
/* looking for the id */
- i += 1;
+ i++;
link_b = i;
while (i < size && data[i] != ']') i++;
if (i >= size) goto cleanup;
link_e = i;
@@ -926,11 +955,11 @@
if (!lr) goto cleanup;
/* keeping link and title from link_ref */
link = lr->link;
title = lr->title;
- i += 1;
+ i++;
}
/* shortcut reference style link */
else {
struct buf id = { 0, 0, 0, 0, 0 };
@@ -993,22 +1022,58 @@
cleanup:
rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
return ret ? i : 0;
}
+static size_t
+char_superscript(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size)
+{
+ size_t sup_start, sup_len;
+ struct buf *sup;
+ if (!rndr->make.superscript)
+ return 0;
+ if (size < 2)
+ return 0;
+
+ if (data[1] == '(') {
+ sup_start = sup_len = 2;
+
+ while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\')
+ sup_len++;
+
+ if (sup_len == size)
+ return 0;
+ } else {
+ sup_start = sup_len = 1;
+
+ while (sup_len < size && !isspace(data[sup_len]))
+ sup_len++;
+ }
+
+ if (sup_len - sup_start == 0)
+ return (sup_start == 2) ? 3 : 0;
+
+ sup = rndr_newbuf(rndr, BUFFER_SPAN);
+ parse_inline(sup, rndr, data + sup_start, sup_len - sup_start);
+ rndr->make.superscript(ob, sup, rndr->make.opaque);
+ rndr_popbuf(rndr, BUFFER_SPAN);
+
+ return (sup_start == 2) ? sup_len + 1 : sup_len;
+}
+
/*********************************
* BLOCK-LEVEL PARSING FUNCTIONS *
*********************************/
/* is_empty • returns the line length when it is empty, 0 otherwise */
static size_t
is_empty(char *data, size_t size)
{
size_t i;
- for (i = 0; i < size && data[i] != '\n'; i += 1)
+ for (i = 0; i < size && data[i] != '\n'; i++)
if (data[i] != ' ' && data[i] != '\t') return 0;
return i + 1;
}
/* is_hrule • returns whether a line is a horizontal rule */
@@ -1018,26 +1083,26 @@
size_t i = 0, n = 0;
char c;
/* skipping initial spaces */
if (size < 3) return 0;
- if (data[0] == ' ') { i += 1;
- if (data[1] == ' ') { i += 1;
- if (data[2] == ' ') { i += 1; } } }
+ if (data[0] == ' ') { i++;
+ if (data[1] == ' ') { i++;
+ if (data[2] == ' ') { i++; } } }
/* looking at the hrule char */
if (i + 2 >= size
|| (data[i] != '*' && data[i] != '-' && data[i] != '_'))
return 0;
c = data[i];
/* the whole line must be the char or whitespace */
while (i < size && data[i] != '\n') {
- if (data[i] == c) n += 1;
+ if (data[i] == c) n++;
else if (data[i] != ' ' && data[i] != '\t')
return 0;
- i += 1; }
+ i++; }
return n >= 3;
}
/* check if a line is a code fence; return its size if it is */
@@ -1047,13 +1112,13 @@
size_t i = 0, n = 0;
char c;
/* skipping initial spaces */
if (size < 3) return 0;
- if (data[0] == ' ') { i += 1;
- if (data[1] == ' ') { i += 1;
- if (data[2] == ' ') { i += 1; } } }
+ if (data[0] == ' ') { i++;
+ if (data[1] == ' ') { i++;
+ if (data[2] == ' ') { i++; } } }
/* looking at the hrule char */
if (i + 2 >= size || !(data[i] == '~' || data[i] == '`'))
return 0;
@@ -1140,31 +1205,31 @@
{
size_t i = 0;
/* test of level 1 header */
if (data[i] == '=') {
- for (i = 1; i < size && data[i] == '='; i += 1);
- while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
+ for (i = 1; i < size && data[i] == '='; i++);
+ while (i < size && (data[i] == ' ' || data[i] == '\t')) i++;
return (i >= size || data[i] == '\n') ? 1 : 0; }
/* test of level 2 header */
if (data[i] == '-') {
- for (i = 1; i < size && data[i] == '-'; i += 1);
- while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1;
+ for (i = 1; i < size && data[i] == '-'; i++);
+ while (i < size && (data[i] == ' ' || data[i] == '\t')) i++;
return (i >= size || data[i] == '\n') ? 2 : 0; }
return 0;
}
/* prefix_quote • returns blockquote prefix length */
static size_t
prefix_quote(char *data, size_t size)
{
size_t i = 0;
- if (i < size && data[i] == ' ') i += 1;
- if (i < size && data[i] == ' ') i += 1;
- if (i < size && data[i] == ' ') i += 1;
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
if (i < size && data[i] == '>') {
if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t'))
return i + 2;
else return i + 1; }
else return 0;
@@ -1183,28 +1248,28 @@
/* prefix_oli • returns ordered list item prefix */
static size_t
prefix_oli(char *data, size_t size)
{
size_t i = 0;
- if (i < size && data[i] == ' ') i += 1;
- if (i < size && data[i] == ' ') i += 1;
- if (i < size && data[i] == ' ') i += 1;
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
if (i >= size || data[i] < '0' || data[i] > '9') return 0;
- while (i < size && data[i] >= '0' && data[i] <= '9') i += 1;
+ while (i < size && data[i] >= '0' && data[i] <= '9') i++;
if (i + 1 >= size || data[i] != '.'
|| (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0;
return i + 2;
}
/* prefix_uli • returns ordered list item prefix */
static size_t
prefix_uli(char *data, size_t size)
{
size_t i = 0;
- if (i < size && data[i] == ' ') i += 1;
- if (i < size && data[i] == ' ') i += 1;
- if (i < size && data[i] == ' ') i += 1;
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
+ if (i < size && data[i] == ' ') i++;
if (i + 1 >= size
|| (data[i] != '*' && data[i] != '+' && data[i] != '-')
|| (data[i + 1] != ' ' && data[i + 1] != '\t'))
return 0;
return i + 2;
@@ -1361,11 +1426,11 @@
if (fence_end != 0) {
beg += fence_end;
break;
}
- for (end = beg + 1; end < size && data[end - 1] != '\n'; end += 1);
+ for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
if (beg < end) {
/* verbatim copy to the working buffer,
escaping entities */
if (is_empty(data + beg, end - beg))
@@ -1675,14 +1740,14 @@
/* HR, which is the only self-closing block tag considered */
if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
i = 3;
while (i < size && data[i] != '>')
- i += 1;
+ i++;
if (i + 1 < size) {
- i += 1;
+ i++;
j = is_empty(data + i, size - i);
if (j) {
work.size = i + j;
if (do_render && rndr->make.blockhtml)
rndr->make.blockhtml(ob, &work, rndr->make.opaque);
@@ -1989,38 +2054,38 @@
if (data[beg + 3] == ' ') return 0; } } }
i += beg;
/* id part: anything but a newline between brackets */
if (data[i] != '[') return 0;
- i += 1;
+ i++;
id_offset = i;
while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
- i += 1;
+ i++;
if (i >= end || data[i] != ']') return 0;
id_end = i;
/* spacer: colon (space | tab)* newline? (space | tab)* */
- i += 1;
+ i++;
if (i >= end || data[i] != ':') return 0;
- i += 1;
- while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
+ i++;
+ while (i < end && (data[i] == ' ' || data[i] == '\t')) i++;
if (i < end && (data[i] == '\n' || data[i] == '\r')) {
- i += 1;
- if (i < end && data[i] == '\r' && data[i - 1] == '\n') i += 1; }
- while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
+ i++;
+ if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
+ while (i < end && (data[i] == ' ' || data[i] == '\t')) i++;
if (i >= end) return 0;
/* link: whitespace-free sequence, optionally between angle brackets */
- if (data[i] == '<') i += 1;
+ if (data[i] == '<') i++;
link_offset = i;
while (i < end && data[i] != ' ' && data[i] != '\t'
- && data[i] != '\n' && data[i] != '\r') i += 1;
+ && data[i] != '\n' && data[i] != '\r') i++;
if (data[i - 1] == '>') link_end = i - 1;
else link_end = i;
/* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
- while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1;
+ while (i < end && (data[i] == ' ' || data[i] == '\t')) i++;
if (i < end && data[i] != '\n' && data[i] != '\r'
&& data[i] != '\'' && data[i] != '"' && data[i] != '(')
return 0;
line_end = 0;
/* computing end-of-line */
@@ -2029,21 +2094,21 @@
line_end = i + 1;
/* optional (space|tab)* spacer after a newline */
if (line_end) {
i = line_end + 1;
- while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; }
+ while (i < end && (data[i] == ' ' || data[i] == '\t')) i++; }
/* optional title: any non-newline sequence enclosed in '"()
alone on its line */
title_offset = title_end = 0;
if (i + 1 < end
&& (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
- i += 1;
+ i++;
title_offset = i;
/* looking for EOL */
- while (i < end && data[i] != '\n' && data[i] != '\r') i += 1;
+ while (i < end && data[i] != '\n' && data[i] != '\r') i++;
if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
title_end = i + 1;
else title_end = i;
/* stepping back */
i -= 1;
@@ -2100,11 +2165,11 @@
* EXPORTED FUNCTIONS *
**********************/
/* markdown • parses the input buffer and renders it into the output buffer */
void
-ups_markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer, unsigned int extensions) {
+sd_markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer, unsigned int extensions) {
struct link_ref *lr;
struct buf *text;
size_t i, beg, end;
struct render rndr;
@@ -2122,11 +2187,11 @@
memcpy(&rndr.make, rndrer, sizeof(struct mkd_renderer));
arr_init(&rndr.refs, sizeof (struct link_ref));
parr_init(&rndr.work_bufs[BUFFER_BLOCK]);
parr_init(&rndr.work_bufs[BUFFER_SPAN]);
- for (i = 0; i < 256; i += 1)
+ for (i = 0; i < 256; i++)
rndr.active_char[i] = 0;
if (rndr.make.emphasis || rndr.make.double_emphasis || rndr.make.triple_emphasis) {
rndr.active_char['*'] = MD_CHAR_EMPHASIS;
rndr.active_char['_'] = MD_CHAR_EMPHASIS;
@@ -2151,10 +2216,13 @@
rndr.active_char[':'] = MD_CHAR_AUTOLINK_URL;
rndr.active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
rndr.active_char['w'] = MD_CHAR_AUTOLINK_WWW;
}
+ if (extensions & MKDEXT_SUPERSCRIPT)
+ rndr.active_char['^'] = MD_CHAR_SUPERSCRIPT;
+
/* Extension data */
rndr.ext_flags = extensions;
rndr.max_nesting = 16;
/* first pass: looking for references, copying everything else */
@@ -2163,21 +2231,21 @@
if (is_ref(ib->data, beg, ib->size, &end, &rndr.refs))
beg = end;
else { /* skipping to the next line */
end = beg;
while (end < ib->size && ib->data[end] != '\n' && ib->data[end] != '\r')
- end += 1;
+ end++;
/* adding the line body if present */
if (end > beg)
expand_tabs(text, ib->data + beg, end - beg);
while (end < ib->size && (ib->data[end] == '\n' || ib->data[end] == '\r')) {
/* add one \n per newline */
if (ib->data[end] == '\n' || (end + 1 < ib->size && ib->data[end + 1] != '\n'))
bufputc(text, '\n');
- end += 1;
+ end++;
}
beg = end;
}
@@ -2201,11 +2269,11 @@
rndr.make.doc_footer(ob, rndr.make.opaque);
/* clean-up */
bufrelease(text);
lr = rndr.refs.base;
- for (i = 0; i < (size_t)rndr.refs.size; i += 1) {
+ for (i = 0; i < (size_t)rndr.refs.size; i++) {
bufrelease(lr[i].id);
bufrelease(lr[i].link);
bufrelease(lr[i].title);
}
@@ -2223,10 +2291,10 @@
parr_free(&rndr.work_bufs[BUFFER_SPAN]);
parr_free(&rndr.work_bufs[BUFFER_BLOCK]);
}
void
-ups_version(int *ver_major, int *ver_minor, int *ver_revision)
+sd_version(int *ver_major, int *ver_minor, int *ver_revision)
{
*ver_major = UPSKIRT_VER_MAJOR;
*ver_minor = UPSKIRT_VER_MINOR;
*ver_revision = UPSKIRT_VER_REVISION;
}