ext/redcarpet/markdown.c in redcarpet-1.17.2 vs ext/redcarpet/markdown.c in redcarpet-2.0.0b

- old
+ new

@@ -58,10 +58,11 @@ static size_t char_langle_tag(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size); static size_t char_autolink_url(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size); static size_t char_autolink_email(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size); static size_t char_autolink_www(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size); static size_t char_link(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size); +static size_t char_superscript(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size); enum markdown_char_t { MD_CHAR_NONE = 0, MD_CHAR_EMPHASIS, MD_CHAR_CODESPAN, @@ -70,11 +71,12 @@ MD_CHAR_LANGLE, MD_CHAR_ESCAPE, MD_CHAR_ENTITITY, MD_CHAR_AUTOLINK_URL, MD_CHAR_AUTOLINK_EMAIL, - MD_CHAR_AUTOLINK_WWW + MD_CHAR_AUTOLINK_WWW, + MD_CHAR_SUPERSCRIPT, }; static char_trigger markdown_char_ptrs[] = { NULL, &char_emphasis, @@ -85,10 +87,11 @@ &char_escape, &char_entity, &char_autolink_url, &char_autolink_email, &char_autolink_www, + &char_superscript, }; /* render • structure containing one particular render */ struct render { struct mkd_renderer make; @@ -143,17 +146,18 @@ { "h4", 2 }, { "h5", 2 }, { "h6", 2 }, { "ol", 2 }, { "ul", 2 }, -/*10*/ { "del", 3 }, + { "del", 3 }, /* 10 */ { "div", 3 }, -/*12*/ { "ins", 3 }, + { "ins", 3 }, /* 12 */ { "pre", 3 }, { "form", 4 }, { "math", 4 }, { "table", 5 }, + { "figure", 6 }, { "iframe", 6 }, { "script", 6 }, { "fieldset", 8 }, { "noscript", 8 }, { "blockquote", 10 } @@ -222,11 +226,11 @@ /* looking for the word end */ while (i < size && ((data[i] >= '0' && data[i] <= '9') || (data[i] >= 'A' && data[i] <= 'Z') || (data[i] >= 'a' && data[i] <= 'z'))) - i += 1; + i++; if (i >= size) return 0; /* binary search of the tag */ key.text = data; key.size = i; @@ -315,21 +319,21 @@ while (i < size) { if (data[i] == '\\') i += 2; else if (data[i] == '>' || data[i] == '\'' || data[i] == '"' || isspace(data[i])) break; - else i += 1; + else i++; } if (i >= size) return 0; if (i > j && data[i] == '>') return i + 1; /* one of the forbidden chars has been found */ *autolink = MKDA_NOT_AUTOLINK; } /* looking for sometinhg looking like a tag end */ - while (i < size && data[i] != '>') i += 1; + while (i < size && data[i] != '>') i++; if (i >= size) return 0; return i + 1; } /* parse_inline • parses inline markdown elements */ @@ -377,51 +381,76 @@ find_emph_char(char *data, size_t size, char c) { size_t i = 1; while (i < size) { - while (i < size && data[i] != c - && data[i] != '`' && data[i] != '[') - i += 1; - if (data[i] == c) return i; + while (i < size && data[i] != c && data[i] != '`' && data[i] != '[') + i++; + if (i == size) + return 0; + + if (data[i] == c) + return i; + /* not counting escaped chars */ - if (i && data[i - 1] == '\\') { i += 1; continue; } + if (i && data[i - 1] == '\\') { + i++; continue; + } /* skipping a code span */ if (data[i] == '`') { size_t tmp_i = 0; - i += 1; + + i++; while (i < size && data[i] != '`') { if (!tmp_i && data[i] == c) tmp_i = i; - i += 1; } - if (i >= size) return tmp_i; - i += 1; } + i++; + } + if (i >= size) + return tmp_i; + + i++; + } /* skipping a link */ else if (data[i] == '[') { size_t tmp_i = 0; char cc; - i += 1; + + i++; while (i < size && data[i] != ']') { if (!tmp_i && data[i] == c) tmp_i = i; - i += 1; } - i += 1; - while (i < size && (data[i] == ' ' - || data[i] == '\t' || data[i] == '\n')) - i += 1; - if (i >= size) return tmp_i; + i++; + } + + i++; + while (i < size && (data[i] == ' ' || data[i] == '\t' || data[i] == '\n')) + i++; + + if (i >= size) + return tmp_i; + if (data[i] != '[' && data[i] != '(') { /* not a link*/ if (tmp_i) return tmp_i; - else continue; } + else continue; + } + cc = data[i]; - i += 1; + i++; while (i < size && data[i] != cc) { if (!tmp_i && data[i] == c) tmp_i = i; - i += 1; } - if (i >= size) return tmp_i; - i += 1; } } + i++; + } + + if (i >= size) + return tmp_i; + + i++; + } + } + return 0; } /* parse_emph1 • parsing single emphase */ /* closed by a symbol not preceded by whitespace and not followed by symbol */ @@ -442,11 +471,11 @@ if (!len) return 0; i += len; if (i >= size) return 0; if (i + 1 < size && data[i + 1] == c) { - i += 1; + i++; continue; } if (data[i] == c && !isspace(data[i - 1])) { @@ -666,11 +695,11 @@ while (end < size && isalnum(data[end])) end++; if (end < size && data[end] == ';') - end += 1; /* real entity */ + end++; /* real entity */ else return 0; /* lone '&' */ if (rndr->make.entity) { work.data = data; @@ -717,11 +746,11 @@ if (!rndr->make.link) return 0; link = rndr_newbuf(rndr, BUFFER_SPAN); - if ((link_len = ups_autolink__www(&rewind, link, data, offset, size)) > 0) { + if ((link_len = sd_autolink__www(&rewind, link, data, offset, size)) > 0) { link_url = rndr_newbuf(rndr, BUFFER_SPAN); BUFPUTSL(link_url, "http://"); bufput(link_url, link->data, link->size); ob->size -= rewind; @@ -742,11 +771,11 @@ if (!rndr->make.autolink) return 0; link = rndr_newbuf(rndr, BUFFER_SPAN); - if ((link_len = ups_autolink__email(&rewind, link, data, offset, size)) > 0) { + if ((link_len = sd_autolink__email(&rewind, link, data, offset, size)) > 0) { ob->size -= rewind; rndr->make.autolink(ob, link, MKDA_EMAIL, rndr->make.opaque); } rndr_popbuf(rndr, BUFFER_SPAN); @@ -762,11 +791,11 @@ if (!rndr->make.autolink) return 0; link = rndr_newbuf(rndr, BUFFER_SPAN); - if ((link_len = ups_autolink__url(&rewind, link, data, offset, size)) > 0) { + if ((link_len = sd_autolink__url(&rewind, link, data, offset, size)) > 0) { ob->size -= rewind; rndr->make.autolink(ob, link, MKDA_NORMAL, rndr->make.opaque); } rndr_popbuf(rndr, BUFFER_SPAN); @@ -789,11 +818,11 @@ /* checking whether the correct renderer exists */ if ((is_img && !rndr->make.image) || (!is_img && !rndr->make.link)) goto cleanup; /* looking for the matching closing bracket */ - for (level = 1; i < size; i += 1) { + for (level = 1; i < size; i++) { if (data[i] == '\n') text_has_nl = 1; else if (data[i - 1] == '\\') continue; @@ -810,32 +839,32 @@ if (i >= size) goto cleanup; txt_e = i; - i += 1; + i++; /* skip any amount of whitespace or newline */ /* (this is much more laxist than original markdown syntax) */ while (i < size && isspace(data[i])) i++; /* inline style link */ if (i < size && data[i] == '(') { /* skipping initial whitespace */ - i += 1; + i++; while (i < size && isspace(data[i])) i++; link_b = i; /* looking for link end: ' " ) */ while (i < size) { if (data[i] == '\\') i += 2; else if (data[i] == ')' || data[i] == '\'' || data[i] == '"') break; - else i += 1; + else i++; } if (i >= size) goto cleanup; link_e = i; @@ -845,11 +874,11 @@ title_b = i; while (i < size) { if (data[i] == '\\') i += 2; else if (data[i] == ')') break; - else i += 1; + else i++; } if (i >= size) goto cleanup; /* skipping whitespaces after title */ @@ -890,11 +919,11 @@ else if (i < size && data[i] == '[') { struct buf id = { 0, 0, 0, 0, 0 }; struct link_ref *lr; /* looking for the id */ - i += 1; + i++; link_b = i; while (i < size && data[i] != ']') i++; if (i >= size) goto cleanup; link_e = i; @@ -926,11 +955,11 @@ if (!lr) goto cleanup; /* keeping link and title from link_ref */ link = lr->link; title = lr->title; - i += 1; + i++; } /* shortcut reference style link */ else { struct buf id = { 0, 0, 0, 0, 0 }; @@ -993,22 +1022,58 @@ cleanup: rndr->work_bufs[BUFFER_SPAN].size = (int)org_work_size; return ret ? i : 0; } +static size_t +char_superscript(struct buf *ob, struct render *rndr, char *data, size_t offset, size_t size) +{ + size_t sup_start, sup_len; + struct buf *sup; + if (!rndr->make.superscript) + return 0; + if (size < 2) + return 0; + + if (data[1] == '(') { + sup_start = sup_len = 2; + + while (sup_len < size && data[sup_len] != ')' && data[sup_len - 1] != '\\') + sup_len++; + + if (sup_len == size) + return 0; + } else { + sup_start = sup_len = 1; + + while (sup_len < size && !isspace(data[sup_len])) + sup_len++; + } + + if (sup_len - sup_start == 0) + return (sup_start == 2) ? 3 : 0; + + sup = rndr_newbuf(rndr, BUFFER_SPAN); + parse_inline(sup, rndr, data + sup_start, sup_len - sup_start); + rndr->make.superscript(ob, sup, rndr->make.opaque); + rndr_popbuf(rndr, BUFFER_SPAN); + + return (sup_start == 2) ? sup_len + 1 : sup_len; +} + /********************************* * BLOCK-LEVEL PARSING FUNCTIONS * *********************************/ /* is_empty • returns the line length when it is empty, 0 otherwise */ static size_t is_empty(char *data, size_t size) { size_t i; - for (i = 0; i < size && data[i] != '\n'; i += 1) + for (i = 0; i < size && data[i] != '\n'; i++) if (data[i] != ' ' && data[i] != '\t') return 0; return i + 1; } /* is_hrule • returns whether a line is a horizontal rule */ @@ -1018,26 +1083,26 @@ size_t i = 0, n = 0; char c; /* skipping initial spaces */ if (size < 3) return 0; - if (data[0] == ' ') { i += 1; - if (data[1] == ' ') { i += 1; - if (data[2] == ' ') { i += 1; } } } + if (data[0] == ' ') { i++; + if (data[1] == ' ') { i++; + if (data[2] == ' ') { i++; } } } /* looking at the hrule char */ if (i + 2 >= size || (data[i] != '*' && data[i] != '-' && data[i] != '_')) return 0; c = data[i]; /* the whole line must be the char or whitespace */ while (i < size && data[i] != '\n') { - if (data[i] == c) n += 1; + if (data[i] == c) n++; else if (data[i] != ' ' && data[i] != '\t') return 0; - i += 1; } + i++; } return n >= 3; } /* check if a line is a code fence; return its size if it is */ @@ -1047,13 +1112,13 @@ size_t i = 0, n = 0; char c; /* skipping initial spaces */ if (size < 3) return 0; - if (data[0] == ' ') { i += 1; - if (data[1] == ' ') { i += 1; - if (data[2] == ' ') { i += 1; } } } + if (data[0] == ' ') { i++; + if (data[1] == ' ') { i++; + if (data[2] == ' ') { i++; } } } /* looking at the hrule char */ if (i + 2 >= size || !(data[i] == '~' || data[i] == '`')) return 0; @@ -1140,31 +1205,31 @@ { size_t i = 0; /* test of level 1 header */ if (data[i] == '=') { - for (i = 1; i < size && data[i] == '='; i += 1); - while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1; + for (i = 1; i < size && data[i] == '='; i++); + while (i < size && (data[i] == ' ' || data[i] == '\t')) i++; return (i >= size || data[i] == '\n') ? 1 : 0; } /* test of level 2 header */ if (data[i] == '-') { - for (i = 1; i < size && data[i] == '-'; i += 1); - while (i < size && (data[i] == ' ' || data[i] == '\t')) i += 1; + for (i = 1; i < size && data[i] == '-'; i++); + while (i < size && (data[i] == ' ' || data[i] == '\t')) i++; return (i >= size || data[i] == '\n') ? 2 : 0; } return 0; } /* prefix_quote • returns blockquote prefix length */ static size_t prefix_quote(char *data, size_t size) { size_t i = 0; - if (i < size && data[i] == ' ') i += 1; - if (i < size && data[i] == ' ') i += 1; - if (i < size && data[i] == ' ') i += 1; + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; if (i < size && data[i] == '>') { if (i + 1 < size && (data[i + 1] == ' ' || data[i+1] == '\t')) return i + 2; else return i + 1; } else return 0; @@ -1183,28 +1248,28 @@ /* prefix_oli • returns ordered list item prefix */ static size_t prefix_oli(char *data, size_t size) { size_t i = 0; - if (i < size && data[i] == ' ') i += 1; - if (i < size && data[i] == ' ') i += 1; - if (i < size && data[i] == ' ') i += 1; + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; if (i >= size || data[i] < '0' || data[i] > '9') return 0; - while (i < size && data[i] >= '0' && data[i] <= '9') i += 1; + while (i < size && data[i] >= '0' && data[i] <= '9') i++; if (i + 1 >= size || data[i] != '.' || (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0; return i + 2; } /* prefix_uli • returns ordered list item prefix */ static size_t prefix_uli(char *data, size_t size) { size_t i = 0; - if (i < size && data[i] == ' ') i += 1; - if (i < size && data[i] == ' ') i += 1; - if (i < size && data[i] == ' ') i += 1; + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; + if (i < size && data[i] == ' ') i++; if (i + 1 >= size || (data[i] != '*' && data[i] != '+' && data[i] != '-') || (data[i + 1] != ' ' && data[i + 1] != '\t')) return 0; return i + 2; @@ -1361,11 +1426,11 @@ if (fence_end != 0) { beg += fence_end; break; } - for (end = beg + 1; end < size && data[end - 1] != '\n'; end += 1); + for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); if (beg < end) { /* verbatim copy to the working buffer, escaping entities */ if (is_empty(data + beg, end - beg)) @@ -1675,14 +1740,14 @@ /* HR, which is the only self-closing block tag considered */ if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) { i = 3; while (i < size && data[i] != '>') - i += 1; + i++; if (i + 1 < size) { - i += 1; + i++; j = is_empty(data + i, size - i); if (j) { work.size = i + j; if (do_render && rndr->make.blockhtml) rndr->make.blockhtml(ob, &work, rndr->make.opaque); @@ -1989,38 +2054,38 @@ if (data[beg + 3] == ' ') return 0; } } } i += beg; /* id part: anything but a newline between brackets */ if (data[i] != '[') return 0; - i += 1; + i++; id_offset = i; while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') - i += 1; + i++; if (i >= end || data[i] != ']') return 0; id_end = i; /* spacer: colon (space | tab)* newline? (space | tab)* */ - i += 1; + i++; if (i >= end || data[i] != ':') return 0; - i += 1; - while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; + i++; + while (i < end && (data[i] == ' ' || data[i] == '\t')) i++; if (i < end && (data[i] == '\n' || data[i] == '\r')) { - i += 1; - if (i < end && data[i] == '\r' && data[i - 1] == '\n') i += 1; } - while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; + i++; + if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; } + while (i < end && (data[i] == ' ' || data[i] == '\t')) i++; if (i >= end) return 0; /* link: whitespace-free sequence, optionally between angle brackets */ - if (data[i] == '<') i += 1; + if (data[i] == '<') i++; link_offset = i; while (i < end && data[i] != ' ' && data[i] != '\t' - && data[i] != '\n' && data[i] != '\r') i += 1; + && data[i] != '\n' && data[i] != '\r') i++; if (data[i - 1] == '>') link_end = i - 1; else link_end = i; /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */ - while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; + while (i < end && (data[i] == ' ' || data[i] == '\t')) i++; if (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != '\'' && data[i] != '"' && data[i] != '(') return 0; line_end = 0; /* computing end-of-line */ @@ -2029,21 +2094,21 @@ line_end = i + 1; /* optional (space|tab)* spacer after a newline */ if (line_end) { i = line_end + 1; - while (i < end && (data[i] == ' ' || data[i] == '\t')) i += 1; } + while (i < end && (data[i] == ' ' || data[i] == '\t')) i++; } /* optional title: any non-newline sequence enclosed in '"() alone on its line */ title_offset = title_end = 0; if (i + 1 < end && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) { - i += 1; + i++; title_offset = i; /* looking for EOL */ - while (i < end && data[i] != '\n' && data[i] != '\r') i += 1; + while (i < end && data[i] != '\n' && data[i] != '\r') i++; if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') title_end = i + 1; else title_end = i; /* stepping back */ i -= 1; @@ -2100,11 +2165,11 @@ * EXPORTED FUNCTIONS * **********************/ /* markdown • parses the input buffer and renders it into the output buffer */ void -ups_markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer, unsigned int extensions) { +sd_markdown(struct buf *ob, struct buf *ib, const struct mkd_renderer *rndrer, unsigned int extensions) { struct link_ref *lr; struct buf *text; size_t i, beg, end; struct render rndr; @@ -2122,11 +2187,11 @@ memcpy(&rndr.make, rndrer, sizeof(struct mkd_renderer)); arr_init(&rndr.refs, sizeof (struct link_ref)); parr_init(&rndr.work_bufs[BUFFER_BLOCK]); parr_init(&rndr.work_bufs[BUFFER_SPAN]); - for (i = 0; i < 256; i += 1) + for (i = 0; i < 256; i++) rndr.active_char[i] = 0; if (rndr.make.emphasis || rndr.make.double_emphasis || rndr.make.triple_emphasis) { rndr.active_char['*'] = MD_CHAR_EMPHASIS; rndr.active_char['_'] = MD_CHAR_EMPHASIS; @@ -2151,10 +2216,13 @@ rndr.active_char[':'] = MD_CHAR_AUTOLINK_URL; rndr.active_char['@'] = MD_CHAR_AUTOLINK_EMAIL; rndr.active_char['w'] = MD_CHAR_AUTOLINK_WWW; } + if (extensions & MKDEXT_SUPERSCRIPT) + rndr.active_char['^'] = MD_CHAR_SUPERSCRIPT; + /* Extension data */ rndr.ext_flags = extensions; rndr.max_nesting = 16; /* first pass: looking for references, copying everything else */ @@ -2163,21 +2231,21 @@ if (is_ref(ib->data, beg, ib->size, &end, &rndr.refs)) beg = end; else { /* skipping to the next line */ end = beg; while (end < ib->size && ib->data[end] != '\n' && ib->data[end] != '\r') - end += 1; + end++; /* adding the line body if present */ if (end > beg) expand_tabs(text, ib->data + beg, end - beg); while (end < ib->size && (ib->data[end] == '\n' || ib->data[end] == '\r')) { /* add one \n per newline */ if (ib->data[end] == '\n' || (end + 1 < ib->size && ib->data[end + 1] != '\n')) bufputc(text, '\n'); - end += 1; + end++; } beg = end; } @@ -2201,11 +2269,11 @@ rndr.make.doc_footer(ob, rndr.make.opaque); /* clean-up */ bufrelease(text); lr = rndr.refs.base; - for (i = 0; i < (size_t)rndr.refs.size; i += 1) { + for (i = 0; i < (size_t)rndr.refs.size; i++) { bufrelease(lr[i].id); bufrelease(lr[i].link); bufrelease(lr[i].title); } @@ -2223,10 +2291,10 @@ parr_free(&rndr.work_bufs[BUFFER_SPAN]); parr_free(&rndr.work_bufs[BUFFER_BLOCK]); } void -ups_version(int *ver_major, int *ver_minor, int *ver_revision) +sd_version(int *ver_major, int *ver_minor, int *ver_revision) { *ver_major = UPSKIRT_VER_MAJOR; *ver_minor = UPSKIRT_VER_MINOR; *ver_revision = UPSKIRT_VER_REVISION; }