ext/markdown.c in rdiscount-2.0.7.3 vs ext/markdown.c in rdiscount-2.1.6

- old
+ new

@@ -20,10 +20,13 @@ typedef int (*stfu)(const void*,const void*); typedef ANCHOR(Paragraph) ParagraphRoot; +static Paragraph *Pp(ParagraphRoot *, Line *, int); +static Paragraph *compile(Line *, int, MMIOT *); + /* case insensitive string sort for Footnote tags. */ int __mkd_footsort(Footnote *a, Footnote *b) { @@ -75,11 +78,11 @@ { return nextnonblank(p,0); } -static int +static inline int blankline(Line *p) { return ! (p && (S(p->text) > p->dle) ); } @@ -171,11 +174,73 @@ SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint); S(t->text) = cutpoint; } } +#define UNCHECK(l) ((l)->flags &= ~CHECKED) +/* + * walk a line, seeing if it's any of half a dozen interesting regular + * types. + */ +static void +checkline(Line *l) +{ + int eol, i; + int dashes = 0, spaces = 0, + equals = 0, underscores = 0, + stars = 0, tildes = 0, + backticks = 0; + + l->flags |= CHECKED; + l->kind = chk_text; + l->count = 0; + + if (l->dle >= 4) { l->kind=chk_code; return; } + + for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol ) + ; + + for (i=l->dle; i<eol; i++) { + register int c = T(l->text)[i]; + + if ( c != ' ' ) l->count++; + + switch (c) { + case '-': dashes = 1; break; + case ' ': spaces = 1; break; + case '=': equals = 1; break; + case '_': underscores = 1; break; + case '*': stars = 1; break; +#if WITH_FENCED_CODE + case '~': tildes = 1; break; + case '`': backticks = 1; break; +#endif + default: return; + } + } + + if ( dashes + equals + underscores + stars + tildes + backticks > 1 ) + return; + + if ( spaces ) { + if ( (underscores || stars || dashes) ) + l->kind = chk_hr; + return; + } + + if ( stars || underscores ) { l->kind = chk_hr; } + else if ( dashes ) { l->kind = chk_dash; } + else if ( equals ) { l->kind = chk_equal; } +#if WITH_FENCED_CODE + else if ( tildes ) { l->kind = chk_tilde; } + else if ( backticks ) { l->kind = chk_backtick; } +#endif +} + + + static Line * commentblock(Paragraph *p, int *unclosed) { Line *t, *ret; char *end; @@ -259,41 +324,10 @@ *unclosed = 1; return 0; } -/* tables look like - * header|header{|header} - * ------|------{|......} - * {body lines} - */ -static int -istable(Line *t) -{ - char *p; - Line *dashes = t->next; - int contains = 0; /* found character bits; 0x01 is |, 0x02 is - */ - - /* two lines, first must contain | */ - if ( !(dashes && memchr(T(t->text), '|', S(t->text))) ) - return 0; - - /* second line must contain - or | and nothing - * else except for whitespace or : - */ - for ( p = T(dashes->text)+S(dashes->text)-1; p >= T(dashes->text); --p) - if ( *p == '|' ) - contains |= 0x01; - else if ( *p == '-' ) - contains |= 0x02; - else if ( ! ((*p == ':') || isspace(*p)) ) - return 0; - - return (contains & 0x03); -} - - /* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$ */ static int isfootnote(Line *t) { @@ -310,80 +344,50 @@ } return 0; } -static int +static inline int isquote(Line *t) { - int j; - - for ( j=0; j < 4; j++ ) - if ( T(t->text)[j] == '>' ) - return 1; - else if ( !isspace(T(t->text)[j]) ) - return 0; - return 0; + return (t->dle < 4 && T(t->text)[t->dle] == '>'); } -static int -dashchar(char c) -{ - return (c == '*') || (c == '-') || (c == '_'); -} - - -static int +static inline int iscode(Line *t) { return (t->dle >= 4); } -static int +static inline int ishr(Line *t) { - int i, count=0; - char dash = 0; - char c; + if ( ! (t->flags & CHECKED) ) + checkline(t); - if ( iscode(t) ) return 0; - - for ( i = 0; i < S(t->text); i++) { - c = T(t->text)[i]; - if ( (dash == 0) && dashchar(c) ) - dash = c; - - if ( c == dash ) ++count; - else if ( !isspace(c) ) - return 0; - } - return (count >= 3); + if ( t->count > 2 ) + return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal; + return 0; } static int issetext(Line *t, int *htyp) { - int i; - /* then check for setext-style HEADER - * ====== + Line *n; + + /* check for setext-style HEADER + * ====== */ - if ( t->next ) { - char *q = T(t->next->text); - int last = S(t->next->text); + if ( (n = t->next) ) { + if ( !(n->flags & CHECKED) ) + checkline(n); - if ( (*q == '=') || (*q == '-') ) { - /* ignore trailing whitespace */ - while ( (last > 1) && isspace(q[last-1]) ) - --last; - - for (i=1; i < last; i++) - if ( q[0] != q[i] ) - return 0; + if ( n->kind == chk_dash || n->kind == chk_equal ) { *htyp = SETEXT; return 1; } } return 0; @@ -391,32 +395,35 @@ static int ishdr(Line *t, int *htyp) { - int i; - - - /* first check for etx-style ###HEADER### - */ - - /* leading run of `#`'s ? - */ - for ( i=0; T(t->text)[i] == '#'; ++i) - ; - /* ANY leading `#`'s make this into an ETX header */ - if ( i && (i < S(t->text) || i > 1) ) { + if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) { *htyp = ETX; return 1; } + /* And if not, maybe it's a SETEXT header instead + */ return issetext(t, htyp); } +static inline int +end_of_block(Line *t) +{ + int dummy; + + if ( !t ) + return 0; + + return ( (S(t->text) <= t->dle) || ishr(t) || ishdr(t, &dummy) ); +} + + static Line* is_discount_dt(Line *t, int *clip) { #if USE_DISCOUNT_DL if ( t && t->next @@ -446,17 +453,16 @@ static Line* is_extra_dt(Line *t, int *clip) { #if USE_EXTRA_DL - int i; - if ( t && t->next && T(t->text)[0] != '=' + if ( t && t->next && S(t->text) && T(t->text)[0] != '=' && T(t->text)[S(t->text)-1] != '=') { Line *x; - if ( iscode(t) || blankline(t) || ishdr(t,&i) || ishr(t) ) + if ( iscode(t) || end_of_block(t) ) return 0; if ( (x = skipempty(t->next)) && is_extra_dd(x) ) { *clip = x->dle+2; return t; @@ -488,11 +494,11 @@ islist(Line *t, int *clip, DWORD flags, int *list_type) { int i, j; char *q; - if ( /*iscode(t) ||*/ blankline(t) || ishdr(t,&i) || ishr(t) ) + if ( end_of_block(t) ) return 0; if ( !(flags & (MKD_NODLIST|MKD_STRICT)) && isdefinition(t,clip,list_type) ) return DL; @@ -516,10 +522,11 @@ } strtoul(T(t->text)+t->dle, &q, 10); if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) { j = nextnonblank(t,j); + /* *clip = j; */ *clip = (j > 4) ? 4 : j; *list_type = OL; return AL; } } @@ -559,10 +566,11 @@ while ( (i < S(p->text)) && isspace(T(p->text)[i]) ) ++i; CLIP(p->text, 0, i); + UNCHECK(p); for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j) ; while ( j && isspace(T(p->text)[j-1]) ) @@ -595,11 +603,54 @@ } return t; } +#ifdef WITH_FENCED_CODE static int +iscodefence(Line *r, int size, line_type kind) +{ + if ( !(r->flags & CHECKED) ) + checkline(r); + + if ( kind ) + return (r->kind == kind) && (r->count >= size); + else + return (r->kind == chk_tilde || r->kind == chk_backtick) && (r->count >= size); +} + +static Paragraph * +fencedcodeblock(ParagraphRoot *d, Line **ptr) +{ + Line *first, *r; + Paragraph *ret; + + first = (*ptr); + + /* don't allow zero-length code fences + */ + if ( (first->next == 0) || iscodefence(first->next, first->count, 0) ) + return 0; + + /* find the closing fence, discard the fences, + * return a Paragraph with the contents + */ + for ( r = first; r && r->next; r = r->next ) + if ( iscodefence(r->next, first->count, first->kind) ) { + (*ptr) = r->next->next; + ret = Pp(d, first->next, CODE); + ___mkd_freeLine(first); + ___mkd_freeLine(r->next); + r->next = 0; + return ret; + } + return 0; +} +#endif + + +static int centered(Line *first, Line *last) { if ( first&&last ) { int len = S(last->text); @@ -618,23 +669,22 @@ static int endoftextblock(Line *t, int toplevelblock, DWORD flags) { int z; - if ( blankline(t)||isquote(t)||ishdr(t,&z)||ishr(t) ) + if ( end_of_block(t) || isquote(t) ) return 1; - /* HORRIBLE STANDARDS KLUDGE: non-toplevel paragraphs absorb adjacent - * code blocks + /* HORRIBLE STANDARDS KLUDGES: + * 1. non-toplevel paragraphs absorb adjacent code blocks + * 2. Toplevel paragraphs eat absorb adjacent list items, + * but sublevel blocks behave properly. + * (What this means is that we only need to check for code + * blocks at toplevel, and only check for list items at + * nested levels.) */ - if ( toplevelblock && iscode(t) ) - return 1; - - /* HORRIBLE STANDARDS KLUDGE: Toplevel paragraphs eat absorb adjacent - * list items, but sublevel blocks behave properly. - */ - return toplevelblock ? 0 : islist(t,&z,flags, &z); + return toplevelblock ? 0 : islist(t,&z,flags,&z); } static Line * textblock(Paragraph *p, int toplevel, DWORD flags) @@ -678,10 +728,11 @@ int last, i; if ( flags & (MKD_NODIVQUOTE|MKD_STRICT) ) return 0; + start = nextnonblank(p, start); last= S(p->text) - (1 + start); s = T(p->text) + start; if ( (last <= 0) || (*s != '%') || (s[last] != '%') ) return 0; @@ -701,11 +752,11 @@ /* * accumulate a blockquote. * * one sick horrible thing about blockquotes is that even though * it just takes ^> to start a quote, following lines, if quoted, - * assume that the prefix is ``>''. This means that code needs + * assume that the prefix is ``> ''. This means that code needs * to be indented *5* spaces from the leading '>', but *4* spaces * from the start of the line. This does not appear to be * documented in the reference implementation, but it's the * way the markdown sample web form at Daring Fireball works. */ @@ -726,10 +777,11 @@ qp++; /* clip next space, if any */ if ( T(t->text)[qp] == ' ' ) qp++; CLIP(t->text, 0, qp); + UNCHECK(t); t->dle = mkd_firstnonblank(t); } q = skipempty(t->next); @@ -758,32 +810,10 @@ } return t; } -/* - * A table block starts with a table header (see istable()), and continues - * until EOF or a line that /doesn't/ contain a |. - */ -static Line * -tableblock(Paragraph *p) -{ - Line *t, *q; - - for ( t = p->text; t && (q = t->next); t = t->next ) { - if ( !memchr(T(q->text), '|', S(q->text)) ) { - t->next = 0; - return q; - } - } - return 0; -} - - -static Paragraph *Pp(ParagraphRoot *, Line *, int); -static Paragraph *compile(Line *, int, MMIOT *); - typedef int (*linefn)(Line *); /* * pull in a list block. A list block starts with a list marker and @@ -798,10 +828,11 @@ int clip = indent; int z; for ( t = p->text; t ; t = q) { CLIP(t->text, 0, clip); + UNCHECK(t); t->dle = mkd_firstnonblank(t); if ( (q = skipempty(t->next)) == 0 ) { ___mkd_freeLineRange(t,q); return 0; @@ -851,17 +882,18 @@ break; if ( (text = skipempty(q->next)) == 0 ) break; - if (( para = (text != q->next) )) + if ( para = (text != q->next) ) ___mkd_freeLineRange(q, text); q->next = 0; if ( kind == 1 /* discount dl */ ) for ( q = labels; q; q = q->next ) { CLIP(q->text, 0, 1); + UNCHECK(q); S(q->text)--; } dd_block: p = Pp(&d, text, LISTITEM); @@ -873,11 +905,11 @@ if ( para && p->down ) p->down->align = PARA; if ( (q = skipempty(text)) == 0 ) break; - if (( para = (q != text) )) { + if ( para = (q != text) ) { Line anchor; anchor.next = text; ___mkd_freeLineRange(&anchor,q); text = q; @@ -1063,20 +1095,26 @@ struct kw *tag; int eaten, unclosed; while ( ptr ) { if ( !(f->flags & MKD_NOHTML) && (tag = isopentag(ptr)) ) { + int blocktype; /* If we encounter a html/style block, compile and save all * of the cached source BEFORE processing the html/style. */ if ( T(source) ) { E(source)->next = 0; p = Pp(&d, 0, SOURCE); p->down = compile(T(source), 1, f); T(source) = E(source) = 0; } - p = Pp(&d, ptr, strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML); + + if ( f->flags & MKD_NOSTYLE ) + blocktype = HTML; + else + blocktype = strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML; + p = Pp(&d, ptr, blocktype); ptr = htmlblock(p, tag, &unclosed); if ( unclosed ) { p->typ = SOURCE; p->down = compile(p->text, 1, f); p->text = 0; @@ -1107,10 +1145,62 @@ } return T(d); } +static int +first_nonblank_before(Line *j, int dle) +{ + return (j->dle < dle) ? j->dle : dle; +} + + +static int +actually_a_table(MMIOT *f, Line *pp) +{ + Line *r; + int j; + int c; + + /* tables need to be turned on */ + if ( f->flags & (MKD_STRICT|MKD_NOTABLES) ) + return 0; + + /* tables need three lines */ + if ( !(pp && pp->next && pp->next->next) ) { + return 0; + } + + /* all lines must contain |'s */ + for (r = pp; r; r = r->next ) + if ( !(r->flags & PIPECHAR) ) { + return 0; + } + + /* if the header has a leading |, all lines must have leading |'s */ + if ( T(pp->text)[pp->dle] == '|' ) { + for ( r = pp; r; r = r->next ) + if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) { + return 0; + } + } + + /* second line must be only whitespace, -, |, or : */ + r = pp->next; + + for ( j=r->dle; j < S(r->text); ++j ) { + c = T(r->text)[j]; + + if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) { + return 0; + } + } + + return 1; +} + + /* * break a collection of markdown input into * blocks of lists, code, html, and text to * be marked up. */ @@ -1137,17 +1227,21 @@ ___mkd_tidy(&p->text->text); } ptr = codeblock(p); } +#if WITH_FENCED_CODE + else if ( iscodefence(ptr,3,0) && (p=fencedcodeblock(&d, &ptr)) ) + /* yay, it's already done */ ; +#endif else if ( ishr(ptr) ) { p = Pp(&d, 0, HR); r = ptr; ptr = ptr->next; ___mkd_freeLine(r); } - else if (( list_class = islist(ptr, &indent, f->flags, &list_type) )) { + else if ( list_class = islist(ptr, &indent, f->flags, &list_type) ) { if ( list_class == DL ) { p = Pp(&d, ptr, DL); ptr = definition_block(p, indent, f, list_type); } else { @@ -1163,16 +1257,15 @@ } else if ( ishdr(ptr, &hdr_type) ) { p = Pp(&d, ptr, HDR); ptr = headerblock(p, hdr_type); } - else if ( istable(ptr) && !(f->flags & (MKD_STRICT|MKD_NOTABLES)) ) { - p = Pp(&d, ptr, TABLE); - ptr = tableblock(p); - } else { p = Pp(&d, ptr, MARKUP); ptr = textblock(p, toplevel, f->flags); + /* tables are a special kind of paragraph */ + if ( actually_a_table(f, p->text) ) + p->typ = TABLE; } if ( (para||toplevel) && !p->align ) p->align = PARA;