ext/markly/table.c in markly-0.7.0 vs ext/markly/table.c in markly-0.8.0

- old
+ new

@@ -9,17 +9,25 @@ #include "ext_scanners.h" #include "strikethrough.h" #include "table.h" #include "cmark-gfm-core-extensions.h" +// Custom node flag, initialized in `create_table_extension`. +static cmark_node_internal_flags CMARK_NODE__TABLE_VISITED; + cmark_node_type CMARK_NODE_TABLE, CMARK_NODE_TABLE_ROW, CMARK_NODE_TABLE_CELL; typedef struct { + cmark_strbuf *buf; + int start_offset, end_offset, internal_offset; +} node_cell; + +typedef struct { uint16_t n_columns; int paragraph_offset; - cmark_llist *cells; + node_cell *cells; } table_row; typedef struct { uint16_t n_columns; uint8_t *alignments; @@ -27,28 +35,28 @@ typedef struct { bool is_header; } node_table_row; -typedef struct { - cmark_strbuf *buf; - int start_offset, end_offset, internal_offset; -} node_cell; - -static void free_table_cell(cmark_mem *mem, void *data) { - node_cell *cell = (node_cell *)data; +static void free_table_cell(cmark_mem *mem, node_cell *cell) { cmark_strbuf_free((cmark_strbuf *)cell->buf); mem->free(cell->buf); - mem->free(cell); } +static void free_row_cells(cmark_mem *mem, table_row *row) { + while (row->n_columns > 0) { + free_table_cell(mem, &row->cells[--row->n_columns]); + } + mem->free(row->cells); + row->cells = NULL; +} + static void free_table_row(cmark_mem *mem, table_row *row) { if (!row) return; - cmark_llist_free_full(mem, row->cells, (cmark_free_func)free_table_cell); - + free_row_cells(mem, row); mem->free(row); } static void free_node_table(cmark_mem *mem, void *ptr) { node_table *t = (node_table *)ptr; @@ -109,10 +117,28 @@ cmark_strbuf_truncate(res, w); return res; } +// Adds a new cell to the end of the row. A pointer to the new cell is returned +// for the caller to initialize. +static node_cell* append_row_cell(cmark_mem *mem, table_row *row) { + const uint32_t n_columns = row->n_columns + 1; + // realloc when n_columns is a power of 2 + if ((n_columns & (n_columns-1)) == 0) { + // make sure we never wrap row->n_columns + // offset will != len and our exit will clean up as intended + if (n_columns > UINT16_MAX) { + return NULL; + } + // Use realloc to double the size of the buffer. + row->cells = (node_cell *)mem->realloc(row->cells, (2 * n_columns - 1) * sizeof(node_cell)); + } + row->n_columns = (uint16_t)n_columns; + return &row->cells[n_columns-1]; +} + static table_row *row_from_string(cmark_syntax_extension *self, cmark_parser *parser, unsigned char *string, int len) { // Parses a single table row. It has the following form: // `delim? table_cell (delim table_cell)* delim? newline` @@ -127,10 +153,11 @@ table_row *row = NULL; bufsize_t cell_matched = 1, pipe_matched = 1, offset; int expect_more_cells = 1; int row_end_offset = 0; + int int_overflow_abort = 0; row = (table_row *)parser->mem->calloc(1, sizeof(table_row)); row->n_columns = 0; row->cells = NULL; @@ -149,22 +176,26 @@ // pipe. cmark_strbuf *cell_buf = unescape_pipes(parser->mem, string + offset, cell_matched); cmark_strbuf_trim(cell_buf); - node_cell *cell = (node_cell *)parser->mem->calloc(1, sizeof(*cell)); + node_cell *cell = append_row_cell(parser->mem, row); + if (!cell) { + int_overflow_abort = 1; + cmark_strbuf_free(cell_buf); + parser->mem->free(cell_buf); + break; + } cell->buf = cell_buf; cell->start_offset = offset; cell->end_offset = offset + cell_matched - 1; + cell->internal_offset = 0; - while (cell->start_offset > 0 && string[cell->start_offset - 1] != '|') { + while (cell->start_offset > row->paragraph_offset && string[cell->start_offset - 1] != '|') { --cell->start_offset; ++cell->internal_offset; } - - row->n_columns += 1; - row->cells = cmark_llist_append(parser->mem, row->cells, cell); } offset += cell_matched + pipe_matched; if (pipe_matched) { @@ -178,13 +209,11 @@ // the row is not a real row but potentially part of the paragraph // preceding the table. if (row_end_offset && offset != len) { row->paragraph_offset = offset; - cmark_llist_free_full(parser->mem, row->cells, (cmark_free_func)free_table_cell); - row->cells = NULL; - row->n_columns = 0; + free_row_cells(parser->mem, row); // Scan past the (optional) leading pipe. offset += scan_table_cell_end(string, len, offset); expect_more_cells = 1; @@ -192,11 +221,11 @@ expect_more_cells = 0; } } } - if (offset != len || row->n_columns == 0) { + if (offset != len || row->n_columns == 0 || int_overflow_abort) { free_table_row(parser->mem, row); row = NULL; } return row; @@ -231,18 +260,27 @@ table_row *marker_row = NULL; node_table_row *ntr; const char *parent_string; uint16_t i; + if (parent_container->flags & CMARK_NODE__TABLE_VISITED) { + return parent_container; + } + if (!scan_table_start(input, len, cmark_parser_get_first_nonspace(parser))) { return parent_container; } // Since scan_table_start was successful, we must have a marker row. marker_row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); + // assert may be optimized out, don't rely on it for security boundaries + if (!marker_row) { + return parent_container; + } + assert(marker_row); cmark_arena_push(); // Check for a matching header row. We call `row_from_string` with the entire @@ -253,19 +291,26 @@ (int)strlen(parent_string)); if (!header_row || header_row->n_columns != marker_row->n_columns) { free_table_row(parser->mem, marker_row); free_table_row(parser->mem, header_row); cmark_arena_pop(); + parent_container->flags |= CMARK_NODE__TABLE_VISITED; return parent_container; } if (cmark_arena_pop()) { marker_row = row_from_string( self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); header_row = row_from_string(self, parser, (unsigned char *)parent_string, (int)strlen(parent_string)); + // row_from_string can return NULL, add additional check to ensure n_columns match + if (!marker_row || !header_row || header_row->n_columns != marker_row->n_columns) { + free_table_row(parser->mem, marker_row); + free_table_row(parser->mem, header_row); + return parent_container; + } } if (!cmark_node_set_type(parent_container, CMARK_NODE_TABLE)) { free_table_row(parser->mem, header_row); free_table_row(parser->mem, marker_row); @@ -279,15 +324,16 @@ cmark_node_set_syntax_extension(parent_container, self); parent_container->as.opaque = parser->mem->calloc(1, sizeof(node_table)); set_n_table_columns(parent_container, header_row->n_columns); + // allocate alignments based on marker_row->n_columns + // since we populate the alignments array based on marker_row->cells uint8_t *alignments = - (uint8_t *)parser->mem->calloc(header_row->n_columns, sizeof(uint8_t)); - cmark_llist *it = marker_row->cells; - for (i = 0; it; it = it->next, ++i) { - node_cell *node = (node_cell *)it->data; + (uint8_t *)parser->mem->calloc(marker_row->n_columns, sizeof(uint8_t)); + for (i = 0; i < marker_row->n_columns; ++i) { + node_cell *node = &marker_row->cells[i]; bool left = node->buf->ptr[0] == ':', right = node->buf->ptr[node->buf->size - 1] == ':'; if (left && right) alignments[i] = 'c'; else if (left) @@ -306,14 +352,12 @@ table_header->as.opaque = ntr = (node_table_row *)parser->mem->calloc(1, sizeof(node_table_row)); ntr->is_header = true; { - cmark_llist *tmp; - - for (tmp = header_row->cells; tmp; tmp = tmp->next) { - node_cell *cell = (node_cell *) tmp->data; + for (i = 0; i < header_row->n_columns; ++i) { + node_cell *cell = &header_row->cells[i]; cmark_node *header_cell = cmark_parser_add_child(parser, table_header, CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); header_cell->start_line = header_cell->end_line = parent_container->start_line; header_cell->internal_offset = cell->internal_offset; header_cell->end_column = parent_container->start_column + cell->end_offset; @@ -349,16 +393,21 @@ table_row_block->as.opaque = parser->mem->calloc(1, sizeof(node_table_row)); row = row_from_string(self, parser, input + cmark_parser_get_first_nonspace(parser), len - cmark_parser_get_first_nonspace(parser)); + if (!row) { + // clean up the dangling node + cmark_node_free(table_row_block); + return NULL; + } + { - cmark_llist *tmp; int i, table_columns = get_n_table_columns(parent_container); - for (tmp = row->cells, i = 0; tmp && i < table_columns; tmp = tmp->next, ++i) { - node_cell *cell = (node_cell *) tmp->data; + for (i = 0; i < row->n_columns && i < table_columns; ++i) { + node_cell *cell = &row->cells[i]; cmark_node *node = cmark_parser_add_child(parser, table_row_block, CMARK_NODE_TABLE_CELL, parent_container->start_column + cell->start_offset); node->internal_offset = cell->internal_offset; node->end_column = parent_container->start_column + cell->end_offset; cmark_node_set_string_content(node, (char *) cell->buf->ptr); @@ -757,9 +806,10 @@ } cmark_syntax_extension *create_table_extension(void) { cmark_syntax_extension *self = cmark_syntax_extension_new("table"); + cmark_register_node_flag(&CMARK_NODE__TABLE_VISITED); cmark_syntax_extension_set_match_block_func(self, matches); cmark_syntax_extension_set_open_block_func(self, try_opening_table_block); cmark_syntax_extension_set_get_type_string_func(self, get_type_string); cmark_syntax_extension_set_can_contain_func(self, can_contain); cmark_syntax_extension_set_contains_inlines_func(self, contains_inlines);