Skip to content

Commit 2eb8ca8

Browse files
committed
Update cmark-upstream to github/cmark-gfm@c8dcdc7
1 parent bbb49db commit 2eb8ca8

13 files changed

+80
-183
lines changed

ext/commonmarker/blocks.c

+23-108
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,14 @@
2727
#define CODE_INDENT 4
2828
#define TAB_STOP 4
2929

30+
/**
31+
* Very deeply nested lists can cause quadratic performance issues.
32+
* This constant is used in open_new_blocks() to limit the nesting
33+
* depth. It is unlikely that a non-contrived markdown document will
34+
* be nested this deeply.
35+
*/
36+
#define MAX_LIST_DEPTH 100
37+
3038
#ifndef MIN
3139
#define MIN(x, y) ((x < y) ? x : y)
3240
#endif
@@ -70,22 +78,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
7078
static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
7179
bufsize_t bytes);
7280

73-
static void subtract_open_block_counts(cmark_parser *parser, cmark_node *node) {
74-
do {
75-
decr_open_block_count(parser, S_type(node));
76-
node->flags &= ~CMARK_NODE__OPEN_BLOCK;
77-
node = node->last_child;
78-
} while (node);
79-
}
80-
81-
static void add_open_block_counts(cmark_parser *parser, cmark_node *node) {
82-
do {
83-
incr_open_block_count(parser, S_type(node));
84-
node->flags |= CMARK_NODE__OPEN_BLOCK;
85-
node = node->last_child;
86-
} while (node);
87-
}
88-
8981
static cmark_node *make_block(cmark_mem *mem, cmark_node_type tag,
9082
int start_line, int start_column) {
9183
cmark_node *e;
@@ -145,7 +137,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
145137
parser->refmap = cmark_reference_map_new(parser->mem);
146138
parser->root = document;
147139
parser->current = document;
148-
add_open_block_counts(parser, document);
149140

150141
parser->syntax_extensions = saved_exts;
151142
parser->inline_syntax_extensions = saved_inline_exts;
@@ -259,18 +250,15 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
259250
// Check to see if a node ends with a blank line, descending
260251
// if needed into lists and sublists.
261252
static bool S_ends_with_blank_line(cmark_node *node) {
262-
while (true) {
263-
if (S_last_line_checked(node)) {
264-
return(S_last_line_blank(node));
265-
} else if ((S_type(node) == CMARK_NODE_LIST ||
266-
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
267-
S_set_last_line_checked(node);
268-
node = node->last_child;
269-
continue;
270-
} else {
271-
S_set_last_line_checked(node);
272-
return (S_last_line_blank(node));
273-
}
253+
if (S_last_line_checked(node)) {
254+
return(S_last_line_blank(node));
255+
} else if ((S_type(node) == CMARK_NODE_LIST ||
256+
S_type(node) == CMARK_NODE_ITEM) && node->last_child) {
257+
S_set_last_line_checked(node);
258+
return(S_ends_with_blank_line(node->last_child));
259+
} else {
260+
S_set_last_line_checked(node);
261+
return (S_last_line_blank(node));
274262
}
275263
}
276264

@@ -330,12 +318,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
330318
has_content = resolve_reference_link_definitions(parser, b);
331319
if (!has_content) {
332320
// remove blank node (former reference def)
333-
if (b->flags & CMARK_NODE__OPEN_BLOCK) {
334-
decr_open_block_count(parser, S_type(b));
335-
if (b->prev) {
336-
add_open_block_counts(parser, b->prev);
337-
}
338-
}
339321
cmark_node_free(b);
340322
}
341323
break;
@@ -408,17 +390,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
408390
return parent;
409391
}
410392

411-
// Recalculates the number of open blocks. Returns true if it matches what's currently stored
412-
// in parser. (Used to check that the counts in parser, which are updated incrementally, are
413-
// correct.)
414-
bool check_open_block_counts(cmark_parser *parser) {
415-
cmark_parser tmp_parser = {0}; // Only used for its open_block_counts and total_open_blocks fields.
416-
add_open_block_counts(&tmp_parser, parser->root);
417-
return
418-
tmp_parser.total_open_blocks == parser->total_open_blocks &&
419-
memcmp(tmp_parser.open_block_counts, parser->open_block_counts, sizeof(parser->open_block_counts)) == 0;
420-
}
421-
422393
// Add a node as child of another. Return pointer to child.
423394
static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
424395
cmark_node_type block_type, int start_column) {
@@ -437,14 +408,11 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
437408
if (parent->last_child) {
438409
parent->last_child->next = child;
439410
child->prev = parent->last_child;
440-
subtract_open_block_counts(parser, parent->last_child);
441411
} else {
442412
parent->first_child = child;
443413
child->prev = NULL;
444414
}
445415
parent->last_child = child;
446-
add_open_block_counts(parser, child);
447-
448416
return child;
449417
}
450418

@@ -1087,14 +1055,8 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
10871055
*all_matched = false;
10881056
cmark_node *container = parser->root;
10891057
cmark_node_type cont_type;
1090-
cmark_parser tmp_parser; // Only used for its open_block_counts and total_open_blocks fields.
1091-
memcpy(tmp_parser.open_block_counts, parser->open_block_counts, sizeof(parser->open_block_counts));
1092-
tmp_parser.total_open_blocks = parser->total_open_blocks;
1093-
1094-
assert(check_open_block_counts(parser));
10951058

10961059
while (S_last_child_is_open(container)) {
1097-
decr_open_block_count(&tmp_parser, S_type(container));
10981060
container = container->last_child;
10991061
cont_type = S_type(container);
11001062

@@ -1106,53 +1068,6 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
11061068
continue;
11071069
}
11081070

1109-
// This block of code is a workaround for the quadratic performance
1110-
// issue described here (issue 2):
1111-
//
1112-
// https://github.com/github/cmark-gfm/security/advisories/GHSA-66g8-4hjf-77xh
1113-
//
1114-
// If the current line is empty then we might be able to skip directly
1115-
// to the end of the list of open blocks. To determine whether this is
1116-
// possible, we have been maintaining a count of the number of
1117-
// different types of open blocks. The main criterium is that every
1118-
// remaining block, except the last element of the list, is a LIST or
1119-
// ITEM. The code below checks the conditions, and if they're ok, skips
1120-
// forward to parser->current.
1121-
if (parser->blank && parser->indent == 0) { // Current line is empty
1122-
// Make sure that parser->current doesn't point to a closed block.
1123-
if (parser->current->flags & CMARK_NODE__OPEN_BLOCK) {
1124-
if (parser->current->flags & CMARK_NODE__OPEN) {
1125-
const size_t n_list = read_open_block_count(&tmp_parser, CMARK_NODE_LIST);
1126-
const size_t n_item = read_open_block_count(&tmp_parser, CMARK_NODE_ITEM);
1127-
// At most one block can be something other than a LIST or ITEM.
1128-
if (n_list + n_item + 1 >= tmp_parser.total_open_blocks) {
1129-
// Check that parser->current is suitable for jumping to.
1130-
switch (S_type(parser->current)) {
1131-
case CMARK_NODE_LIST:
1132-
case CMARK_NODE_ITEM:
1133-
if (n_list + n_item != tmp_parser.total_open_blocks) {
1134-
if (parser->current->last_child == NULL) {
1135-
// There's another node type somewhere in the middle of
1136-
// the list, so don't attempt the optimization.
1137-
break;
1138-
}
1139-
}
1140-
// fall through
1141-
case CMARK_NODE_CODE_BLOCK:
1142-
case CMARK_NODE_PARAGRAPH:
1143-
case CMARK_NODE_HTML_BLOCK:
1144-
// Jump to parser->current
1145-
container = parser->current;
1146-
cont_type = S_type(container);
1147-
break;
1148-
default:
1149-
break;
1150-
}
1151-
}
1152-
}
1153-
}
1154-
}
1155-
11561071
switch (cont_type) {
11571072
case CMARK_NODE_BLOCK_QUOTE:
11581073
if (!parse_block_quote_prefix(parser, input))
@@ -1212,10 +1127,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
12121127
bool has_content;
12131128
int save_offset;
12141129
int save_column;
1130+
size_t depth = 0;
12151131

12161132
while (cont_type != CMARK_NODE_CODE_BLOCK &&
12171133
cont_type != CMARK_NODE_HTML_BLOCK) {
1218-
1134+
depth++;
12191135
S_find_first_nonspace(parser, input);
12201136
indented = parser->indent >= CODE_INDENT;
12211137

@@ -1286,9 +1202,8 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
12861202
has_content = resolve_reference_link_definitions(parser, *container);
12871203

12881204
if (has_content) {
1289-
cmark_node_set_type(*container, CMARK_NODE_HEADING);
1290-
decr_open_block_count(parser, CMARK_NODE_PARAGRAPH);
1291-
incr_open_block_count(parser, CMARK_NODE_HEADING);
1205+
1206+
(*container)->type = (uint16_t)CMARK_NODE_HEADING;
12921207
(*container)->as.heading.level = lev;
12931208
(*container)->as.heading.setext = true;
12941209
S_advance_offset(parser, input, input->len - 1 - parser->offset, false);
@@ -1318,6 +1233,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
13181233
(*container)->internal_offset = matched;
13191234
} else if ((!indented || cont_type == CMARK_NODE_LIST) &&
13201235
parser->indent < 4 &&
1236+
depth < MAX_LIST_DEPTH &&
13211237
(matched = parse_list_marker(
13221238
parser->mem, input, parser->first_nonspace,
13231239
(*container)->type == CMARK_NODE_PARAGRAPH, &data))) {
@@ -1443,7 +1359,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
14431359
S_set_last_line_blank(container, last_line_blank);
14441360

14451361
tmp = container;
1446-
while (tmp->parent && S_last_line_blank(tmp->parent)) {
1362+
while (tmp->parent) {
14471363
S_set_last_line_blank(tmp->parent, false);
14481364
tmp = tmp->parent;
14491365
}
@@ -1572,7 +1488,6 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
15721488

15731489
parser->line_number++;
15741490

1575-
assert(parser->current->next == NULL);
15761491
last_matched_container = check_open_blocks(parser, &input, &all_matched);
15771492

15781493
if (!last_matched_container)

ext/commonmarker/cmark-gfm.h

+11-10
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,6 @@ char *cmark_markdown_to_html(const char *text, size_t len, int options);
3737
#define CMARK_NODE_TYPE_MASK (0xc000)
3838
#define CMARK_NODE_VALUE_MASK (0x3fff)
3939

40-
/**
41-
* This is the maximum number of block types (CMARK_NODE_DOCUMENT,
42-
* CMARK_NODE_HEADING, ...). It needs to be bigger than the number of
43-
* hardcoded block types (below) to allow for extensions (see
44-
* cmark_syntax_extension_add_node). But it also determines the size of the
45-
* open_block_counts array in the cmark_parser struct, so we don't want it
46-
* to be excessively large.
47-
*/
48-
#define CMARK_NODE_TYPE_BLOCK_LIMIT 0x20
49-
5040
typedef enum {
5141
/* Error status */
5242
CMARK_NODE_NONE = 0x0000,
@@ -423,6 +413,17 @@ CMARK_GFM_EXPORT int cmark_node_get_list_tight(cmark_node *node);
423413
*/
424414
CMARK_GFM_EXPORT int cmark_node_set_list_tight(cmark_node *node, int tight);
425415

416+
/**
417+
* Returns item index of 'node'. This is only used when rendering output
418+
* formats such as commonmark, which need to output the index. It is not
419+
* required for formats such as html or latex.
420+
*/
421+
CMARK_GFM_EXPORT int cmark_node_get_item_index(cmark_node *node);
422+
423+
/** Sets item index of 'node'. Returns 1 on success, 0 on failure.
424+
*/
425+
CMARK_GFM_EXPORT int cmark_node_set_item_index(cmark_node *node, int idx);
426+
426427
/** Returns the info string from a fenced code block.
427428
*/
428429
CMARK_GFM_EXPORT const char *cmark_node_get_fence_info(cmark_node *node);

ext/commonmarker/commonmark.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -216,14 +216,13 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
216216
LIT("<!-- end list -->");
217217
BLANKLINE();
218218
}
219-
renderer->list_number = cmark_node_get_list_start(node);
220219
break;
221220

222221
case CMARK_NODE_ITEM:
223222
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
224223
marker_width = 4;
225224
} else {
226-
list_number = renderer->list_number++;
225+
list_number = cmark_node_get_item_index(node);
227226
list_delim = cmark_node_get_list_delim(node->parent);
228227
// we ensure a width of at least 4 so
229228
// we get nice transition from single digits

ext/commonmarker/man.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
113113
break;
114114

115115
case CMARK_NODE_LIST:
116-
renderer->list_number = cmark_node_get_list_start(node);
117116
break;
118117

119118
case CMARK_NODE_ITEM:
@@ -123,7 +122,7 @@ static int S_render_node(cmark_renderer *renderer, cmark_node *node,
123122
if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
124123
LIT("\\[bu] 2");
125124
} else {
126-
list_number = renderer->list_number++;
125+
list_number = cmark_node_get_item_index(node);
127126
char list_number_s[LIST_NUMBER_SIZE];
128127
snprintf(list_number_s, LIST_NUMBER_SIZE, "\"%d.\" 4", list_number);
129128
LIT(list_number_s);

ext/commonmarker/node.c

+26-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ void cmark_register_node_flag(cmark_node_internal_flags *flags) {
3939
nextflag <<= 1;
4040
}
4141

42-
void cmark_init_standard_node_flags() {}
42+
void cmark_init_standard_node_flags(void) {}
4343

4444
bool cmark_node_can_contain_type(cmark_node *node, cmark_node_type child_type) {
4545
if (child_type == CMARK_NODE_DOCUMENT) {
@@ -564,6 +564,31 @@ int cmark_node_set_list_tight(cmark_node *node, int tight) {
564564
}
565565
}
566566

567+
int cmark_node_get_item_index(cmark_node *node) {
568+
if (node == NULL) {
569+
return 0;
570+
}
571+
572+
if (node->type == CMARK_NODE_ITEM) {
573+
return node->as.list.start;
574+
} else {
575+
return 0;
576+
}
577+
}
578+
579+
int cmark_node_set_item_index(cmark_node *node, int idx) {
580+
if (node == NULL || idx < 0) {
581+
return 0;
582+
}
583+
584+
if (node->type == CMARK_NODE_ITEM) {
585+
node->as.list.start = idx;
586+
return 1;
587+
} else {
588+
return 0;
589+
}
590+
}
591+
567592
const char *cmark_node_get_fence_info(cmark_node *node) {
568593
if (node == NULL) {
569594
return NULL;

ext/commonmarker/node.h

+4-5
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,12 @@ typedef struct {
5050

5151
enum cmark_node__internal_flags {
5252
CMARK_NODE__OPEN = (1 << 0),
53-
CMARK_NODE__OPEN_BLOCK = (1 << 1),
54-
CMARK_NODE__LAST_LINE_BLANK = (1 << 2),
55-
CMARK_NODE__LAST_LINE_CHECKED = (1 << 3),
53+
CMARK_NODE__LAST_LINE_BLANK = (1 << 1),
54+
CMARK_NODE__LAST_LINE_CHECKED = (1 << 2),
5655

5756
// Extensions can register custom flags by calling `cmark_register_node_flag`.
5857
// This is the starting value for the custom flags.
59-
CMARK_NODE__REGISTER_FIRST = (1 << 4),
58+
CMARK_NODE__REGISTER_FIRST = (1 << 3),
6059
};
6160

6261
typedef uint16_t cmark_node_internal_flags;
@@ -128,7 +127,7 @@ void cmark_register_node_flag(cmark_node_internal_flags *flags);
128127
* library. It is now a no-op.
129128
*/
130129
CMARK_GFM_EXPORT
131-
void cmark_init_standard_node_flags();
130+
void cmark_init_standard_node_flags(void);
132131

133132
static CMARK_INLINE cmark_mem *cmark_node_mem(cmark_node *node) {
134133
return node->content.mem;

0 commit comments

Comments
 (0)