ext/commonmarker/blocks.c in commonmarker-0.19.0 vs ext/commonmarker/blocks.c in commonmarker-0.20.0
- old
+ new
@@ -34,10 +34,14 @@
static bool S_last_line_blank(const cmark_node *node) {
return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0;
}
+static bool S_last_line_checked(const cmark_node *node) {
+ return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0;
+}
+
static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) {
return (cmark_node_type)node->type;
}
static void S_set_last_line_blank(cmark_node *node, bool is_blank) {
@@ -45,10 +49,14 @@
node->flags |= CMARK_NODE__LAST_LINE_BLANK;
else
node->flags &= ~CMARK_NODE__LAST_LINE_BLANK;
}
+static void S_set_last_line_checked(cmark_node *node) {
+ node->flags |= CMARK_NODE__LAST_LINE_CHECKED;
+}
+
static CMARK_INLINE bool S_is_line_end_char(char c) {
return (c == '\n' || c == '\r');
}
static CMARK_INLINE bool S_is_space_or_tab(char c) {
@@ -119,12 +127,10 @@
parser->refmap = cmark_reference_map_new(parser->mem);
parser->root = document;
parser->current = document;
- parser->last_buffer_ended_with_cr = false;
-
parser->syntax_extensions = saved_exts;
parser->inline_syntax_extensions = saved_inline_exts;
parser->options = saved_options;
}
@@ -232,30 +238,47 @@
}
}
// Check to see if a node ends with a blank line, descending
// if needed into lists and sublists.
-static bool ends_with_blank_line(cmark_node *node) {
- cmark_node *cur = node;
- while (cur != NULL) {
- if (S_last_line_blank(cur)) {
- return true;
- }
- if (S_type(cur) == CMARK_NODE_LIST || S_type(cur) == CMARK_NODE_ITEM) {
- cur = cur->last_child;
- } else {
- cur = NULL;
- }
+static bool S_ends_with_blank_line(cmark_node *node) {
+ if (S_last_line_checked(node)) {
+ return(S_last_line_blank(node));
+ } else if ((S_type(node) == CMARK_NODE_LIST ||
+ S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
+ S_set_last_line_checked(node);
+ return(S_ends_with_blank_line(node->last_child));
+ } else {
+ S_set_last_line_checked(node);
+ return (S_last_line_blank(node));
}
- return false;
}
+// returns true if content remains after link defs are resolved.
+static bool resolve_reference_link_definitions(
+ cmark_parser *parser,
+ cmark_node *b) {
+ bufsize_t pos;
+ cmark_strbuf *node_content = &b->content;
+ cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
+ while (chunk.len && chunk.data[0] == '[' &&
+ (pos = cmark_parse_reference_inline(parser->mem, &chunk,
+ parser->refmap))) {
+
+ chunk.data += pos;
+ chunk.len -= pos;
+ }
+ cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
+ return !is_blank(&b->content, 0);
+}
+
static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
bufsize_t pos;
cmark_node *item;
cmark_node *subitem;
cmark_node *parent;
+ bool has_content;
parent = b->parent;
assert(b->flags &
CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks
b->flags &= ~CMARK_NODE__OPEN;
@@ -281,19 +304,12 @@
cmark_strbuf *node_content = &b->content;
switch (S_type(b)) {
case CMARK_NODE_PARAGRAPH:
{
- cmark_chunk chunk = {node_content->ptr, node_content->size, 0};
- while (chunk.len && chunk.data[0] == '[' &&
- (pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) {
-
- chunk.data += pos;
- chunk.len -= pos;
- }
- cmark_strbuf_drop(node_content, (node_content->size - chunk.len));
- if (is_blank(node_content, 0)) {
+ has_content = resolve_reference_link_definitions(parser, b);
+ if (!has_content) {
// remove blank node (former reference def)
cmark_node_free(b);
}
break;
}
@@ -341,11 +357,12 @@
}
// recurse into children of list item, to see if there are
// spaces between them:
subitem = item->first_child;
while (subitem) {
- if (ends_with_blank_line(subitem) && (item->next || subitem->next)) {
+ if ((item->next || subitem->next) &&
+ S_ends_with_blank_line(subitem)) {
b->as.list.tight = false;
break;
}
subitem = subitem->next;
}
@@ -746,10 +763,44 @@
ch->len = n;
cmark_chunk_rtrim(ch);
}
}
+// Check for thematic break. On failure, return 0 and update
+// thematic_break_kill_pos with the index at which the
+// parse fails. On success, return length of match.
+// "...three or more hyphens, asterisks,
+// or underscores on a line by themselves. If you wish, you may use
+// spaces between the hyphens or asterisks."
+static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input,
+ bufsize_t offset) {
+ bufsize_t i;
+ char c;
+ char nextc = '\0';
+ int count;
+ i = offset;
+ c = peek_at(input, i);
+ if (!(c == '*' || c == '_' || c == '-')) {
+ parser->thematic_break_kill_pos = i;
+ return 0;
+ }
+ count = 1;
+ while ((nextc = peek_at(input, ++i))) {
+ if (nextc == c) {
+ count++;
+ } else if (nextc != ' ' && nextc != '\t') {
+ break;
+ }
+ }
+ if (count >= 3 && (nextc == '\r' || nextc == '\n')) {
+ return (i - offset) + 1;
+ } else {
+ parser->thematic_break_kill_pos = i;
+ return 0;
+ }
+}
+
// Find first nonspace character from current offset, setting
// parser->first_nonspace, parser->first_nonspace_column,
// parser->indent, and parser->blank. Does not advance parser->offset.
static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) {
char c;
@@ -1038,10 +1089,11 @@
bool maybe_lazy = S_type(parser->current) == CMARK_NODE_PARAGRAPH;
cmark_node_type cont_type = S_type(*container);
bufsize_t matched = 0;
int lev = 0;
bool save_partially_consumed_tab;
+ bool has_content;
int save_offset;
int save_column;
while (cont_type != CMARK_NODE_CODE_BLOCK &&
cont_type != CMARK_NODE_HTML_BLOCK) {
@@ -1110,17 +1162,24 @@
// note, we don't adjust parser->offset because the tag is part of the
// text
} else if (!indented && cont_type == CMARK_NODE_PARAGRAPH &&
(lev =
scan_setext_heading_line(input, parser->first_nonspace))) {
- (*container)->type = (uint16_t)CMARK_NODE_HEADING;
- (*container)->as.heading.level = lev;
- (*container)->as.heading.setext = true;
- S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
+ // finalize paragraph, resolving reference links
+ has_content = resolve_reference_link_definitions(parser, *container);
+
+ if (has_content) {
+
+ (*container)->type = (uint16_t)CMARK_NODE_HEADING;
+ (*container)->as.heading.level = lev;
+ (*container)->as.heading.setext = true;
+ S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
+ }
} else if (!indented &&
!(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) &&
- (matched = scan_thematic_break(input, parser->first_nonspace))) {
+ (parser->thematic_break_kill_pos <= parser->first_nonspace) &&
+ (matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) {
// it's only now that we know the line is not part of a setext heading:
*container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK,
parser->first_nonspace + 1);
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
} else if (!indented &&
@@ -1375,9 +1434,10 @@
parser->offset = 0;
parser->column = 0;
parser->first_nonspace = 0;
parser->first_nonspace_column = 0;
+ parser->thematic_break_kill_pos = 0;
parser->indent = 0;
parser->blank = false;
parser->partially_consumed_tab = false;
input.data = parser->curline.ptr;