blocks.c in commonmarker-0.20.0

- old
+ new

@@ -34,10 +34,14 @@
 
 static bool S_last_line_blank(const cmark_node *node) {
   return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
 }
 
+static bool S_last_line_checked(const cmark_node *node) {
+  return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
+}
+
 static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
   return (cmark_node_type)node->type;
 }
 
 static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
@@ -45,10 +49,14 @@
     node->flags |= CMARK_NODE__LAST_LINE_BLANK;
   else
     node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
 }
 
+static void S_set_last_line_checked(cmark_node *node) {
+  node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
+}
+
 static CMARK_INLINE bool S_is_line_end_char(char c) {
   return (c == '\n' || c == '\r');
 }
 
 static CMARK_INLINE bool S_is_space_or_tab(char c) {
@@ -119,12 +127,10 @@
 
   parser->refmap = cmark_reference_map_new(parser->mem);
   parser->root = document;
   parser->current = document;
 
-  parser->last_buffer_ended_with_cr = false;
-
   parser->syntax_extensions = saved_exts;
   parser->inline_syntax_extensions = saved_inline_exts;
   parser->options = saved_options;
 }
 
@@ -232,30 +238,47 @@
   }
 }
 
 // Check to see if a node ends with a blank line, descending
 // if needed into lists and sublists.
-static bool ends_with_blank_line(cmark_node *node) {
-  cmark_node *cur = node;
-  while (cur != NULL) {
-    if (S_last_line_blank(cur)) {
-      return true;
-    }
-    if (S_type(cur) == CMARK_NODE_LIST || S_type(cur) == CMARK_NODE_ITEM) {
-      cur = cur->last_child;
-    } else {
-      cur = NULL;
-    }
+static bool S_ends_with_blank_line(cmark_node *node) {
+  if (S_last_line_checked(node)) {
+    return(S_last_line_blank(node));
+  } else if ((S_type(node) == CMARK_NODE_LIST ||
+              S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
+    S_set_last_line_checked(node);
+    return(S_ends_with_blank_line(node->last_child));
+  } else {
+    S_set_last_line_checked(node);
+    return (S_last_line_blank(node));
   }
-  return false;
 }
 
+// returns true if content remains after link defs are resolved.
+static bool resolve_reference_link_definitions(
+		cmark_parser *parser,
+                cmark_node *b) {
+  bufsize_t pos;
+  cmark_strbuf *node_content = &b->content;
+  cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
+  while (chunk.len && chunk.data[0] == '[' &&
+         (pos = cmark_parse_reference_inline(parser->mem, &chunk,
+					     parser->refmap))) {
+
+    chunk.data += pos;
+    chunk.len -= pos;
+  }
+  cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
+  return !is_blank(&b->content, 0);
+}
+
 static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
   bufsize_t pos;
   cmark_node *item;
   cmark_node *subitem;
   cmark_node *parent;
+  bool has_content;
 
   parent = b->parent;
   assert(b->flags &
          CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks
   b->flags &= ~CMARK_NODE__OPEN;
@@ -281,19 +304,12 @@
   cmark_strbuf *node_content = &b->content;
 
   switch (S_type(b)) {
   case CMARK_NODE_PARAGRAPH:
   {
-    cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
-    while (chunk.len && chunk.data[0] == '[' &&
-           (pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) {
-
-      chunk.data += pos;
-      chunk.len -= pos;
-    }
-    cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
-    if (is_blank(node_content, 0)) {
+    has_content = resolve_reference_link_definitions(parser, b);
+    if (!has_content) {
       // remove blank node (former reference def)
       cmark_node_free(b);
     }
     break;
   }
@@ -341,11 +357,12 @@
       }
       // recurse into children of list item, to see if there are
       // spaces between them:
       subitem = item->first_child;
       while (subitem) {
-        if (ends_with_blank_line(subitem) && (item->next || subitem->next)) {
+        if ((item->next || subitem->next) &&
+            S_ends_with_blank_line(subitem)) {
           b->as.list.tight = false;
           break;
         }
         subitem = subitem->next;
       }
@@ -746,10 +763,44 @@
     ch->len = n;
     cmark_chunk_rtrim(ch);
   }
 }
 
+// Check for thematic break.  On failure, return 0 and update
+// thematic_break_kill_pos with the index at which the
+// parse fails.  On success, return length of match.
+// "...three or more hyphens, asterisks,
+// or underscores on a line by themselves. If you wish, you may use
+// spaces between the hyphens or asterisks."
+static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
+                                 bufsize_t offset) {
+  bufsize_t i;
+  char c;
+  char nextc = '\0';
+  int count;
+  i = offset;
+  c = peek_at(input, i);
+  if (!(c == '*' || c == '_' || c == '-')) {
+    parser->thematic_break_kill_pos = i;
+    return 0;
+  }
+  count = 1;
+  while ((nextc = peek_at(input, ++i))) {
+    if (nextc == c) {
+      count++;
+    } else if (nextc != ' ' && nextc != '\t') {
+      break;
+    }
+  }
+  if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
+    return (i - offset) + 1;
+  } else {
+    parser->thematic_break_kill_pos = i;
+    return 0;
+  }
+}
+
 // Find first nonspace character from current offset, setting
 // parser->first_nonspace, parser->first_nonspace_column,
 // parser->indent, and parser->blank. Does not advance parser->offset.
 static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) {
   char c;
@@ -1038,10 +1089,11 @@
   bool maybe_lazy = S_type(parser->current) == CMARK_NODE_PARAGRAPH;
   cmark_node_type cont_type = S_type(*container);
   bufsize_t matched = 0;
   int lev = 0;
   bool save_partially_consumed_tab;
+  bool has_content;
   int save_offset;
   int save_column;
 
   while (cont_type != CMARK_NODE_CODE_BLOCK &&
          cont_type != CMARK_NODE_HTML_BLOCK) {
@@ -1110,17 +1162,24 @@
       // note, we don't adjust parser->offset because the tag is part of the
       // text
     } else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
                (lev =
                     scan_setext_heading_line(input, parser->first_nonspace))) {
-      (*container)->type = (uint16_t)CMARK_NODE_HEADING;
-      (*container)->as.heading.level = lev;
-      (*container)->as.heading.setext = true;
-      S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
+      // finalize paragraph, resolving reference links
+      has_content = resolve_reference_link_definitions(parser, *container);
+
+      if (has_content) {
+
+        (*container)->type = (uint16_t)CMARK_NODE_HEADING;
+        (*container)->as.heading.level = lev;
+        (*container)->as.heading.setext = true;
+        S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
+      }
     } else if (!indented &&
                !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
-               (matched = scan_thematic_break(input, parser->first_nonspace))) {
+	       (parser->thematic_break_kill_pos <= parser->first_nonspace) &&
+               (matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
       // it's only now that we know the line is not part of a setext heading:
       *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
                              parser->first_nonspace + 1);
       S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
     } else if (!indented &&
@@ -1375,9 +1434,10 @@
 
   parser->offset = 0;
   parser->column = 0;
   parser->first_nonspace = 0;
   parser->first_nonspace_column = 0;
+  parser->thematic_break_kill_pos = 0;
   parser->indent = 0;
   parser->blank = false;
   parser->partially_consumed_tab = false;
 
   input.data = parser->curline.ptr;