ext/commonmarker/blocks.c in commonmarker-0.19.0 vs ext/commonmarker/blocks.c in commonmarker-0.20.0

- old
+ new

@@ -34,10 +34,14 @@ static bool S_last_line_blank(const cmark_node *node) { return (node->flags & CMARK_NODE__LAST_LINE_BLANK) != 0; } +static bool S_last_line_checked(const cmark_node *node) { + return (node->flags & CMARK_NODE__LAST_LINE_CHECKED) != 0; +} + static CMARK_INLINE cmark_node_type S_type(const cmark_node *node) { return (cmark_node_type)node->type; } static void S_set_last_line_blank(cmark_node *node, bool is_blank) { @@ -45,10 +49,14 @@ node->flags |= CMARK_NODE__LAST_LINE_BLANK; else node->flags &= ~CMARK_NODE__LAST_LINE_BLANK; } +static void S_set_last_line_checked(cmark_node *node) { + node->flags |= CMARK_NODE__LAST_LINE_CHECKED; +} + static CMARK_INLINE bool S_is_line_end_char(char c) { return (c == '\n' || c == '\r'); } static CMARK_INLINE bool S_is_space_or_tab(char c) { @@ -119,12 +127,10 @@ parser->refmap = cmark_reference_map_new(parser->mem); parser->root = document; parser->current = document; - parser->last_buffer_ended_with_cr = false; - parser->syntax_extensions = saved_exts; parser->inline_syntax_extensions = saved_inline_exts; parser->options = saved_options; } @@ -232,30 +238,47 @@ } } // Check to see if a node ends with a blank line, descending // if needed into lists and sublists. -static bool ends_with_blank_line(cmark_node *node) { - cmark_node *cur = node; - while (cur != NULL) { - if (S_last_line_blank(cur)) { - return true; - } - if (S_type(cur) == CMARK_NODE_LIST || S_type(cur) == CMARK_NODE_ITEM) { - cur = cur->last_child; - } else { - cur = NULL; - } +static bool S_ends_with_blank_line(cmark_node *node) { + if (S_last_line_checked(node)) { + return(S_last_line_blank(node)); + } else if ((S_type(node) == CMARK_NODE_LIST || + S_type(node) == CMARK_NODE_ITEM) && node->last_child) { + S_set_last_line_checked(node); + return(S_ends_with_blank_line(node->last_child)); + } else { + S_set_last_line_checked(node); + return (S_last_line_blank(node)); } - return false; } +// returns true if content remains after link defs are resolved. +static bool resolve_reference_link_definitions( + cmark_parser *parser, + cmark_node *b) { + bufsize_t pos; + cmark_strbuf *node_content = &b->content; + cmark_chunk chunk = {node_content->ptr, node_content->size, 0}; + while (chunk.len && chunk.data[0] == '[' && + (pos = cmark_parse_reference_inline(parser->mem, &chunk, + parser->refmap))) { + + chunk.data += pos; + chunk.len -= pos; + } + cmark_strbuf_drop(node_content, (node_content->size - chunk.len)); + return !is_blank(&b->content, 0); +} + static cmark_node *finalize(cmark_parser *parser, cmark_node *b) { bufsize_t pos; cmark_node *item; cmark_node *subitem; cmark_node *parent; + bool has_content; parent = b->parent; assert(b->flags & CMARK_NODE__OPEN); // shouldn't call finalize on closed blocks b->flags &= ~CMARK_NODE__OPEN; @@ -281,19 +304,12 @@ cmark_strbuf *node_content = &b->content; switch (S_type(b)) { case CMARK_NODE_PARAGRAPH: { - cmark_chunk chunk = {node_content->ptr, node_content->size, 0}; - while (chunk.len && chunk.data[0] == '[' && - (pos = cmark_parse_reference_inline(parser->mem, &chunk, parser->refmap))) { - - chunk.data += pos; - chunk.len -= pos; - } - cmark_strbuf_drop(node_content, (node_content->size - chunk.len)); - if (is_blank(node_content, 0)) { + has_content = resolve_reference_link_definitions(parser, b); + if (!has_content) { // remove blank node (former reference def) cmark_node_free(b); } break; } @@ -341,11 +357,12 @@ } // recurse into children of list item, to see if there are // spaces between them: subitem = item->first_child; while (subitem) { - if (ends_with_blank_line(subitem) && (item->next || subitem->next)) { + if ((item->next || subitem->next) && + S_ends_with_blank_line(subitem)) { b->as.list.tight = false; break; } subitem = subitem->next; } @@ -746,10 +763,44 @@ ch->len = n; cmark_chunk_rtrim(ch); } } +// Check for thematic break. On failure, return 0 and update +// thematic_break_kill_pos with the index at which the +// parse fails. On success, return length of match. +// "...three or more hyphens, asterisks, +// or underscores on a line by themselves. If you wish, you may use +// spaces between the hyphens or asterisks." +static int S_scan_thematic_break(cmark_parser *parser, cmark_chunk *input, + bufsize_t offset) { + bufsize_t i; + char c; + char nextc = '\0'; + int count; + i = offset; + c = peek_at(input, i); + if (!(c == '*' || c == '_' || c == '-')) { + parser->thematic_break_kill_pos = i; + return 0; + } + count = 1; + while ((nextc = peek_at(input, ++i))) { + if (nextc == c) { + count++; + } else if (nextc != ' ' && nextc != '\t') { + break; + } + } + if (count >= 3 && (nextc == '\r' || nextc == '\n')) { + return (i - offset) + 1; + } else { + parser->thematic_break_kill_pos = i; + return 0; + } +} + // Find first nonspace character from current offset, setting // parser->first_nonspace, parser->first_nonspace_column, // parser->indent, and parser->blank. Does not advance parser->offset. static void S_find_first_nonspace(cmark_parser *parser, cmark_chunk *input) { char c; @@ -1038,10 +1089,11 @@ bool maybe_lazy = S_type(parser->current) == CMARK_NODE_PARAGRAPH; cmark_node_type cont_type = S_type(*container); bufsize_t matched = 0; int lev = 0; bool save_partially_consumed_tab; + bool has_content; int save_offset; int save_column; while (cont_type != CMARK_NODE_CODE_BLOCK && cont_type != CMARK_NODE_HTML_BLOCK) { @@ -1110,17 +1162,24 @@ // note, we don't adjust parser->offset because the tag is part of the // text } else if (!indented && cont_type == CMARK_NODE_PARAGRAPH && (lev = scan_setext_heading_line(input, parser->first_nonspace))) { - (*container)->type = (uint16_t)CMARK_NODE_HEADING; - (*container)->as.heading.level = lev; - (*container)->as.heading.setext = true; - S_advance_offset(parser, input, input->len - 1 - parser->offset, false); + // finalize paragraph, resolving reference links + has_content = resolve_reference_link_definitions(parser, *container); + + if (has_content) { + + (*container)->type = (uint16_t)CMARK_NODE_HEADING; + (*container)->as.heading.level = lev; + (*container)->as.heading.setext = true; + S_advance_offset(parser, input, input->len - 1 - parser->offset, false); + } } else if (!indented && !(cont_type == CMARK_NODE_PARAGRAPH && !all_matched) && - (matched = scan_thematic_break(input, parser->first_nonspace))) { + (parser->thematic_break_kill_pos <= parser->first_nonspace) && + (matched = S_scan_thematic_break(parser, input, parser->first_nonspace))) { // it's only now that we know the line is not part of a setext heading: *container = add_child(parser, *container, CMARK_NODE_THEMATIC_BREAK, parser->first_nonspace + 1); S_advance_offset(parser, input, input->len - 1 - parser->offset, false); } else if (!indented && @@ -1375,9 +1434,10 @@ parser->offset = 0; parser->column = 0; parser->first_nonspace = 0; parser->first_nonspace_column = 0; + parser->thematic_break_kill_pos = 0; parser->indent = 0; parser->blank = false; parser->partially_consumed_tab = false; input.data = parser->curline.ptr;