ext/markdown.c in rdiscount-2.0.7.3 vs ext/markdown.c in rdiscount-2.1.6
- old
+ new
@@ -20,10 +20,13 @@
typedef int (*stfu)(const void*,const void*);
typedef ANCHOR(Paragraph) ParagraphRoot;
+static Paragraph *Pp(ParagraphRoot *, Line *, int);
+static Paragraph *compile(Line *, int, MMIOT *);
+
/* case insensitive string sort for Footnote tags.
*/
int
__mkd_footsort(Footnote *a, Footnote *b)
{
@@ -75,11 +78,11 @@
{
return nextnonblank(p,0);
}
-static int
+static inline int
blankline(Line *p)
{
return ! (p && (S(p->text) > p->dle) );
}
@@ -171,11 +174,73 @@
SUFFIX(tmp->text, T(t->text)+cutpoint, S(t->text)-cutpoint);
S(t->text) = cutpoint;
}
}
+#define UNCHECK(l) ((l)->flags &= ~CHECKED)
+/*
+ * walk a line, seeing if it's any of half a dozen interesting regular
+ * types.
+ */
+static void
+checkline(Line *l)
+{
+ int eol, i;
+ int dashes = 0, spaces = 0,
+ equals = 0, underscores = 0,
+ stars = 0, tildes = 0,
+ backticks = 0;
+
+ l->flags |= CHECKED;
+ l->kind = chk_text;
+ l->count = 0;
+
+ if (l->dle >= 4) { l->kind=chk_code; return; }
+
+ for ( eol = S(l->text); eol > l->dle && isspace(T(l->text)[eol-1]); --eol )
+ ;
+
+ for (i=l->dle; i<eol; i++) {
+ register int c = T(l->text)[i];
+
+ if ( c != ' ' ) l->count++;
+
+ switch (c) {
+ case '-': dashes = 1; break;
+ case ' ': spaces = 1; break;
+ case '=': equals = 1; break;
+ case '_': underscores = 1; break;
+ case '*': stars = 1; break;
+#if WITH_FENCED_CODE
+ case '~': tildes = 1; break;
+ case '`': backticks = 1; break;
+#endif
+ default: return;
+ }
+ }
+
+ if ( dashes + equals + underscores + stars + tildes + backticks > 1 )
+ return;
+
+ if ( spaces ) {
+ if ( (underscores || stars || dashes) )
+ l->kind = chk_hr;
+ return;
+ }
+
+ if ( stars || underscores ) { l->kind = chk_hr; }
+ else if ( dashes ) { l->kind = chk_dash; }
+ else if ( equals ) { l->kind = chk_equal; }
+#if WITH_FENCED_CODE
+ else if ( tildes ) { l->kind = chk_tilde; }
+ else if ( backticks ) { l->kind = chk_backtick; }
+#endif
+}
+
+
+
static Line *
commentblock(Paragraph *p, int *unclosed)
{
Line *t, *ret;
char *end;
@@ -259,41 +324,10 @@
*unclosed = 1;
return 0;
}
-/* tables look like
- * header|header{|header}
- * ------|------{|......}
- * {body lines}
- */
-static int
-istable(Line *t)
-{
- char *p;
- Line *dashes = t->next;
- int contains = 0; /* found character bits; 0x01 is |, 0x02 is - */
-
- /* two lines, first must contain | */
- if ( !(dashes && memchr(T(t->text), '|', S(t->text))) )
- return 0;
-
- /* second line must contain - or | and nothing
- * else except for whitespace or :
- */
- for ( p = T(dashes->text)+S(dashes->text)-1; p >= T(dashes->text); --p)
- if ( *p == '|' )
- contains |= 0x01;
- else if ( *p == '-' )
- contains |= 0x02;
- else if ( ! ((*p == ':') || isspace(*p)) )
- return 0;
-
- return (contains & 0x03);
-}
-
-
/* footnotes look like ^<whitespace>{0,3}[stuff]: <content>$
*/
static int
isfootnote(Line *t)
{
@@ -310,80 +344,50 @@
}
return 0;
}
-static int
+static inline int
isquote(Line *t)
{
- int j;
-
- for ( j=0; j < 4; j++ )
- if ( T(t->text)[j] == '>' )
- return 1;
- else if ( !isspace(T(t->text)[j]) )
- return 0;
- return 0;
+ return (t->dle < 4 && T(t->text)[t->dle] == '>');
}
-static int
-dashchar(char c)
-{
- return (c == '*') || (c == '-') || (c == '_');
-}
-
-
-static int
+static inline int
iscode(Line *t)
{
return (t->dle >= 4);
}
-static int
+static inline int
ishr(Line *t)
{
- int i, count=0;
- char dash = 0;
- char c;
+ if ( ! (t->flags & CHECKED) )
+ checkline(t);
- if ( iscode(t) ) return 0;
-
- for ( i = 0; i < S(t->text); i++) {
- c = T(t->text)[i];
- if ( (dash == 0) && dashchar(c) )
- dash = c;
-
- if ( c == dash ) ++count;
- else if ( !isspace(c) )
- return 0;
- }
- return (count >= 3);
+ if ( t->count > 2 )
+ return t->kind == chk_hr || t->kind == chk_dash || t->kind == chk_equal;
+ return 0;
}
static int
issetext(Line *t, int *htyp)
{
- int i;
- /* then check for setext-style HEADER
- * ======
+ Line *n;
+
+ /* check for setext-style HEADER
+ * ======
*/
- if ( t->next ) {
- char *q = T(t->next->text);
- int last = S(t->next->text);
+ if ( (n = t->next) ) {
+ if ( !(n->flags & CHECKED) )
+ checkline(n);
- if ( (*q == '=') || (*q == '-') ) {
- /* ignore trailing whitespace */
- while ( (last > 1) && isspace(q[last-1]) )
- --last;
-
- for (i=1; i < last; i++)
- if ( q[0] != q[i] )
- return 0;
+ if ( n->kind == chk_dash || n->kind == chk_equal ) {
*htyp = SETEXT;
return 1;
}
}
return 0;
@@ -391,32 +395,35 @@
static int
ishdr(Line *t, int *htyp)
{
- int i;
-
-
- /* first check for etx-style ###HEADER###
- */
-
- /* leading run of `#`'s ?
- */
- for ( i=0; T(t->text)[i] == '#'; ++i)
- ;
-
/* ANY leading `#`'s make this into an ETX header
*/
- if ( i && (i < S(t->text) || i > 1) ) {
+ if ( (t->dle == 0) && (S(t->text) > 1) && (T(t->text)[0] == '#') ) {
*htyp = ETX;
return 1;
}
+ /* And if not, maybe it's a SETEXT header instead
+ */
return issetext(t, htyp);
}
+static inline int
+end_of_block(Line *t)
+{
+ int dummy;
+
+ if ( !t )
+ return 0;
+
+ return ( (S(t->text) <= t->dle) || ishr(t) || ishdr(t, &dummy) );
+}
+
+
static Line*
is_discount_dt(Line *t, int *clip)
{
#if USE_DISCOUNT_DL
if ( t && t->next
@@ -446,17 +453,16 @@
static Line*
is_extra_dt(Line *t, int *clip)
{
#if USE_EXTRA_DL
- int i;
- if ( t && t->next && T(t->text)[0] != '='
+ if ( t && t->next && S(t->text) && T(t->text)[0] != '='
&& T(t->text)[S(t->text)-1] != '=') {
Line *x;
- if ( iscode(t) || blankline(t) || ishdr(t,&i) || ishr(t) )
+ if ( iscode(t) || end_of_block(t) )
return 0;
if ( (x = skipempty(t->next)) && is_extra_dd(x) ) {
*clip = x->dle+2;
return t;
@@ -488,11 +494,11 @@
islist(Line *t, int *clip, DWORD flags, int *list_type)
{
int i, j;
char *q;
- if ( /*iscode(t) ||*/ blankline(t) || ishdr(t,&i) || ishr(t) )
+ if ( end_of_block(t) )
return 0;
if ( !(flags & (MKD_NODLIST|MKD_STRICT)) && isdefinition(t,clip,list_type) )
return DL;
@@ -516,10 +522,11 @@
}
strtoul(T(t->text)+t->dle, &q, 10);
if ( (q > T(t->text)+t->dle) && (q == T(t->text) + (j-1)) ) {
j = nextnonblank(t,j);
+ /* *clip = j; */
*clip = (j > 4) ? 4 : j;
*list_type = OL;
return AL;
}
}
@@ -559,10 +566,11 @@
while ( (i < S(p->text)) && isspace(T(p->text)[i]) )
++i;
CLIP(p->text, 0, i);
+ UNCHECK(p);
for (j=S(p->text); (j > 1) && (T(p->text)[j-1] == '#'); --j)
;
while ( j && isspace(T(p->text)[j-1]) )
@@ -595,11 +603,54 @@
}
return t;
}
+#ifdef WITH_FENCED_CODE
static int
+iscodefence(Line *r, int size, line_type kind)
+{
+ if ( !(r->flags & CHECKED) )
+ checkline(r);
+
+ if ( kind )
+ return (r->kind == kind) && (r->count >= size);
+ else
+ return (r->kind == chk_tilde || r->kind == chk_backtick) && (r->count >= size);
+}
+
+static Paragraph *
+fencedcodeblock(ParagraphRoot *d, Line **ptr)
+{
+ Line *first, *r;
+ Paragraph *ret;
+
+ first = (*ptr);
+
+ /* don't allow zero-length code fences
+ */
+ if ( (first->next == 0) || iscodefence(first->next, first->count, 0) )
+ return 0;
+
+ /* find the closing fence, discard the fences,
+ * return a Paragraph with the contents
+ */
+ for ( r = first; r && r->next; r = r->next )
+ if ( iscodefence(r->next, first->count, first->kind) ) {
+ (*ptr) = r->next->next;
+ ret = Pp(d, first->next, CODE);
+ ___mkd_freeLine(first);
+ ___mkd_freeLine(r->next);
+ r->next = 0;
+ return ret;
+ }
+ return 0;
+}
+#endif
+
+
+static int
centered(Line *first, Line *last)
{
if ( first&&last ) {
int len = S(last->text);
@@ -618,23 +669,22 @@
static int
endoftextblock(Line *t, int toplevelblock, DWORD flags)
{
int z;
- if ( blankline(t)||isquote(t)||ishdr(t,&z)||ishr(t) )
+ if ( end_of_block(t) || isquote(t) )
return 1;
- /* HORRIBLE STANDARDS KLUDGE: non-toplevel paragraphs absorb adjacent
- * code blocks
+ /* HORRIBLE STANDARDS KLUDGES:
+ * 1. non-toplevel paragraphs absorb adjacent code blocks
+ * 2. Toplevel paragraphs eat absorb adjacent list items,
+ * but sublevel blocks behave properly.
+ * (What this means is that we only need to check for code
+ * blocks at toplevel, and only check for list items at
+ * nested levels.)
*/
- if ( toplevelblock && iscode(t) )
- return 1;
-
- /* HORRIBLE STANDARDS KLUDGE: Toplevel paragraphs eat absorb adjacent
- * list items, but sublevel blocks behave properly.
- */
- return toplevelblock ? 0 : islist(t,&z,flags, &z);
+ return toplevelblock ? 0 : islist(t,&z,flags,&z);
}
static Line *
textblock(Paragraph *p, int toplevel, DWORD flags)
@@ -678,10 +728,11 @@
int last, i;
if ( flags & (MKD_NODIVQUOTE|MKD_STRICT) )
return 0;
+ start = nextnonblank(p, start);
last= S(p->text) - (1 + start);
s = T(p->text) + start;
if ( (last <= 0) || (*s != '%') || (s[last] != '%') )
return 0;
@@ -701,11 +752,11 @@
/*
* accumulate a blockquote.
*
* one sick horrible thing about blockquotes is that even though
* it just takes ^> to start a quote, following lines, if quoted,
- * assume that the prefix is ``>''. This means that code needs
+ * assume that the prefix is ``> ''. This means that code needs
* to be indented *5* spaces from the leading '>', but *4* spaces
* from the start of the line. This does not appear to be
* documented in the reference implementation, but it's the
* way the markdown sample web form at Daring Fireball works.
*/
@@ -726,10 +777,11 @@
qp++;
/* clip next space, if any */
if ( T(t->text)[qp] == ' ' )
qp++;
CLIP(t->text, 0, qp);
+ UNCHECK(t);
t->dle = mkd_firstnonblank(t);
}
q = skipempty(t->next);
@@ -758,32 +810,10 @@
}
return t;
}
-/*
- * A table block starts with a table header (see istable()), and continues
- * until EOF or a line that /doesn't/ contain a |.
- */
-static Line *
-tableblock(Paragraph *p)
-{
- Line *t, *q;
-
- for ( t = p->text; t && (q = t->next); t = t->next ) {
- if ( !memchr(T(q->text), '|', S(q->text)) ) {
- t->next = 0;
- return q;
- }
- }
- return 0;
-}
-
-
-static Paragraph *Pp(ParagraphRoot *, Line *, int);
-static Paragraph *compile(Line *, int, MMIOT *);
-
typedef int (*linefn)(Line *);
/*
* pull in a list block. A list block starts with a list marker and
@@ -798,10 +828,11 @@
int clip = indent;
int z;
for ( t = p->text; t ; t = q) {
CLIP(t->text, 0, clip);
+ UNCHECK(t);
t->dle = mkd_firstnonblank(t);
if ( (q = skipempty(t->next)) == 0 ) {
___mkd_freeLineRange(t,q);
return 0;
@@ -851,17 +882,18 @@
break;
if ( (text = skipempty(q->next)) == 0 )
break;
- if (( para = (text != q->next) ))
+ if ( para = (text != q->next) )
___mkd_freeLineRange(q, text);
q->next = 0;
if ( kind == 1 /* discount dl */ )
for ( q = labels; q; q = q->next ) {
CLIP(q->text, 0, 1);
+ UNCHECK(q);
S(q->text)--;
}
dd_block:
p = Pp(&d, text, LISTITEM);
@@ -873,11 +905,11 @@
if ( para && p->down ) p->down->align = PARA;
if ( (q = skipempty(text)) == 0 )
break;
- if (( para = (q != text) )) {
+ if ( para = (q != text) ) {
Line anchor;
anchor.next = text;
___mkd_freeLineRange(&anchor,q);
text = q;
@@ -1063,20 +1095,26 @@
struct kw *tag;
int eaten, unclosed;
while ( ptr ) {
if ( !(f->flags & MKD_NOHTML) && (tag = isopentag(ptr)) ) {
+ int blocktype;
/* If we encounter a html/style block, compile and save all
* of the cached source BEFORE processing the html/style.
*/
if ( T(source) ) {
E(source)->next = 0;
p = Pp(&d, 0, SOURCE);
p->down = compile(T(source), 1, f);
T(source) = E(source) = 0;
}
- p = Pp(&d, ptr, strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML);
+
+ if ( f->flags & MKD_NOSTYLE )
+ blocktype = HTML;
+ else
+ blocktype = strcmp(tag->id, "STYLE") == 0 ? STYLE : HTML;
+ p = Pp(&d, ptr, blocktype);
ptr = htmlblock(p, tag, &unclosed);
if ( unclosed ) {
p->typ = SOURCE;
p->down = compile(p->text, 1, f);
p->text = 0;
@@ -1107,10 +1145,62 @@
}
return T(d);
}
+static int
+first_nonblank_before(Line *j, int dle)
+{
+ return (j->dle < dle) ? j->dle : dle;
+}
+
+
+static int
+actually_a_table(MMIOT *f, Line *pp)
+{
+ Line *r;
+ int j;
+ int c;
+
+ /* tables need to be turned on */
+ if ( f->flags & (MKD_STRICT|MKD_NOTABLES) )
+ return 0;
+
+ /* tables need three lines */
+ if ( !(pp && pp->next && pp->next->next) ) {
+ return 0;
+ }
+
+ /* all lines must contain |'s */
+ for (r = pp; r; r = r->next )
+ if ( !(r->flags & PIPECHAR) ) {
+ return 0;
+ }
+
+ /* if the header has a leading |, all lines must have leading |'s */
+ if ( T(pp->text)[pp->dle] == '|' ) {
+ for ( r = pp; r; r = r->next )
+ if ( T(r->text)[first_nonblank_before(r,pp->dle)] != '|' ) {
+ return 0;
+ }
+ }
+
+ /* second line must be only whitespace, -, |, or : */
+ r = pp->next;
+
+ for ( j=r->dle; j < S(r->text); ++j ) {
+ c = T(r->text)[j];
+
+ if ( !(isspace(c)||(c=='-')||(c==':')||(c=='|')) ) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+
/*
* break a collection of markdown input into
* blocks of lists, code, html, and text to
* be marked up.
*/
@@ -1137,17 +1227,21 @@
___mkd_tidy(&p->text->text);
}
ptr = codeblock(p);
}
+#if WITH_FENCED_CODE
+ else if ( iscodefence(ptr,3,0) && (p=fencedcodeblock(&d, &ptr)) )
+ /* yay, it's already done */ ;
+#endif
else if ( ishr(ptr) ) {
p = Pp(&d, 0, HR);
r = ptr;
ptr = ptr->next;
___mkd_freeLine(r);
}
- else if (( list_class = islist(ptr, &indent, f->flags, &list_type) )) {
+ else if ( list_class = islist(ptr, &indent, f->flags, &list_type) ) {
if ( list_class == DL ) {
p = Pp(&d, ptr, DL);
ptr = definition_block(p, indent, f, list_type);
}
else {
@@ -1163,16 +1257,15 @@
}
else if ( ishdr(ptr, &hdr_type) ) {
p = Pp(&d, ptr, HDR);
ptr = headerblock(p, hdr_type);
}
- else if ( istable(ptr) && !(f->flags & (MKD_STRICT|MKD_NOTABLES)) ) {
- p = Pp(&d, ptr, TABLE);
- ptr = tableblock(p);
- }
else {
p = Pp(&d, ptr, MARKUP);
ptr = textblock(p, toplevel, f->flags);
+ /* tables are a special kind of paragraph */
+ if ( actually_a_table(f, p->text) )
+ p->typ = TABLE;
}
if ( (para||toplevel) && !p->align )
p->align = PARA;