27
27
#define CODE_INDENT 4
28
28
#define TAB_STOP 4
29
29
30
+ /**
31
+ * Very deeply nested lists can cause quadratic performance issues.
32
+ * This constant is used in open_new_blocks() to limit the nesting
33
+ * depth. It is unlikely that a non-contrived markdown document will
34
+ * be nested this deeply.
35
+ */
36
+ #define MAX_LIST_DEPTH 100
37
+
30
38
#ifndef MIN
31
39
#define MIN (x , y ) ((x < y) ? x : y)
32
40
#endif
@@ -70,22 +78,6 @@ static void S_parser_feed(cmark_parser *parser, const unsigned char *buffer,
70
78
static void S_process_line (cmark_parser * parser , const unsigned char * buffer ,
71
79
bufsize_t bytes );
72
80
73
- static void subtract_open_block_counts (cmark_parser * parser , cmark_node * node ) {
74
- do {
75
- decr_open_block_count (parser , S_type (node ));
76
- node -> flags &= ~CMARK_NODE__OPEN_BLOCK ;
77
- node = node -> last_child ;
78
- } while (node );
79
- }
80
-
81
- static void add_open_block_counts (cmark_parser * parser , cmark_node * node ) {
82
- do {
83
- incr_open_block_count (parser , S_type (node ));
84
- node -> flags |= CMARK_NODE__OPEN_BLOCK ;
85
- node = node -> last_child ;
86
- } while (node );
87
- }
88
-
89
81
static cmark_node * make_block (cmark_mem * mem , cmark_node_type tag ,
90
82
int start_line , int start_column ) {
91
83
cmark_node * e ;
@@ -145,7 +137,6 @@ static void cmark_parser_reset(cmark_parser *parser) {
145
137
parser -> refmap = cmark_reference_map_new (parser -> mem );
146
138
parser -> root = document ;
147
139
parser -> current = document ;
148
- add_open_block_counts (parser , document );
149
140
150
141
parser -> syntax_extensions = saved_exts ;
151
142
parser -> inline_syntax_extensions = saved_inline_exts ;
@@ -259,18 +250,15 @@ static void remove_trailing_blank_lines(cmark_strbuf *ln) {
259
250
// Check to see if a node ends with a blank line, descending
260
251
// if needed into lists and sublists.
261
252
static bool S_ends_with_blank_line (cmark_node * node ) {
262
- while (true) {
263
- if (S_last_line_checked (node )) {
264
- return (S_last_line_blank (node ));
265
- } else if ((S_type (node ) == CMARK_NODE_LIST ||
266
- S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
267
- S_set_last_line_checked (node );
268
- node = node -> last_child ;
269
- continue ;
270
- } else {
271
- S_set_last_line_checked (node );
272
- return (S_last_line_blank (node ));
273
- }
253
+ if (S_last_line_checked (node )) {
254
+ return (S_last_line_blank (node ));
255
+ } else if ((S_type (node ) == CMARK_NODE_LIST ||
256
+ S_type (node ) == CMARK_NODE_ITEM ) && node -> last_child ) {
257
+ S_set_last_line_checked (node );
258
+ return (S_ends_with_blank_line (node -> last_child ));
259
+ } else {
260
+ S_set_last_line_checked (node );
261
+ return (S_last_line_blank (node ));
274
262
}
275
263
}
276
264
@@ -330,12 +318,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
330
318
has_content = resolve_reference_link_definitions (parser , b );
331
319
if (!has_content ) {
332
320
// remove blank node (former reference def)
333
- if (b -> flags & CMARK_NODE__OPEN_BLOCK ) {
334
- decr_open_block_count (parser , S_type (b ));
335
- if (b -> prev ) {
336
- add_open_block_counts (parser , b -> prev );
337
- }
338
- }
339
321
cmark_node_free (b );
340
322
}
341
323
break ;
@@ -408,17 +390,6 @@ static cmark_node *finalize(cmark_parser *parser, cmark_node *b) {
408
390
return parent ;
409
391
}
410
392
411
- // Recalculates the number of open blocks. Returns true if it matches what's currently stored
412
- // in parser. (Used to check that the counts in parser, which are updated incrementally, are
413
- // correct.)
414
- bool check_open_block_counts (cmark_parser * parser ) {
415
- cmark_parser tmp_parser = {0 }; // Only used for its open_block_counts and total_open_blocks fields.
416
- add_open_block_counts (& tmp_parser , parser -> root );
417
- return
418
- tmp_parser .total_open_blocks == parser -> total_open_blocks &&
419
- memcmp (tmp_parser .open_block_counts , parser -> open_block_counts , sizeof (parser -> open_block_counts )) == 0 ;
420
- }
421
-
422
393
// Add a node as child of another. Return pointer to child.
423
394
static cmark_node * add_child (cmark_parser * parser , cmark_node * parent ,
424
395
cmark_node_type block_type , int start_column ) {
@@ -437,14 +408,11 @@ static cmark_node *add_child(cmark_parser *parser, cmark_node *parent,
437
408
if (parent -> last_child ) {
438
409
parent -> last_child -> next = child ;
439
410
child -> prev = parent -> last_child ;
440
- subtract_open_block_counts (parser , parent -> last_child );
441
411
} else {
442
412
parent -> first_child = child ;
443
413
child -> prev = NULL ;
444
414
}
445
415
parent -> last_child = child ;
446
- add_open_block_counts (parser , child );
447
-
448
416
return child ;
449
417
}
450
418
@@ -1087,14 +1055,8 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
1087
1055
* all_matched = false;
1088
1056
cmark_node * container = parser -> root ;
1089
1057
cmark_node_type cont_type ;
1090
- cmark_parser tmp_parser ; // Only used for its open_block_counts and total_open_blocks fields.
1091
- memcpy (tmp_parser .open_block_counts , parser -> open_block_counts , sizeof (parser -> open_block_counts ));
1092
- tmp_parser .total_open_blocks = parser -> total_open_blocks ;
1093
-
1094
- assert (check_open_block_counts (parser ));
1095
1058
1096
1059
while (S_last_child_is_open (container )) {
1097
- decr_open_block_count (& tmp_parser , S_type (container ));
1098
1060
container = container -> last_child ;
1099
1061
cont_type = S_type (container );
1100
1062
@@ -1106,53 +1068,6 @@ static cmark_node *check_open_blocks(cmark_parser *parser, cmark_chunk *input,
1106
1068
continue ;
1107
1069
}
1108
1070
1109
- // This block of code is a workaround for the quadratic performance
1110
- // issue described here (issue 2):
1111
- //
1112
- // https://github.com/github/cmark-gfm/security/advisories/GHSA-66g8-4hjf-77xh
1113
- //
1114
- // If the current line is empty then we might be able to skip directly
1115
- // to the end of the list of open blocks. To determine whether this is
1116
- // possible, we have been maintaining a count of the number of
1117
- // different types of open blocks. The main criterium is that every
1118
- // remaining block, except the last element of the list, is a LIST or
1119
- // ITEM. The code below checks the conditions, and if they're ok, skips
1120
- // forward to parser->current.
1121
- if (parser -> blank && parser -> indent == 0 ) { // Current line is empty
1122
- // Make sure that parser->current doesn't point to a closed block.
1123
- if (parser -> current -> flags & CMARK_NODE__OPEN_BLOCK ) {
1124
- if (parser -> current -> flags & CMARK_NODE__OPEN ) {
1125
- const size_t n_list = read_open_block_count (& tmp_parser , CMARK_NODE_LIST );
1126
- const size_t n_item = read_open_block_count (& tmp_parser , CMARK_NODE_ITEM );
1127
- // At most one block can be something other than a LIST or ITEM.
1128
- if (n_list + n_item + 1 >= tmp_parser .total_open_blocks ) {
1129
- // Check that parser->current is suitable for jumping to.
1130
- switch (S_type (parser -> current )) {
1131
- case CMARK_NODE_LIST :
1132
- case CMARK_NODE_ITEM :
1133
- if (n_list + n_item != tmp_parser .total_open_blocks ) {
1134
- if (parser -> current -> last_child == NULL ) {
1135
- // There's another node type somewhere in the middle of
1136
- // the list, so don't attempt the optimization.
1137
- break ;
1138
- }
1139
- }
1140
- // fall through
1141
- case CMARK_NODE_CODE_BLOCK :
1142
- case CMARK_NODE_PARAGRAPH :
1143
- case CMARK_NODE_HTML_BLOCK :
1144
- // Jump to parser->current
1145
- container = parser -> current ;
1146
- cont_type = S_type (container );
1147
- break ;
1148
- default :
1149
- break ;
1150
- }
1151
- }
1152
- }
1153
- }
1154
- }
1155
-
1156
1071
switch (cont_type ) {
1157
1072
case CMARK_NODE_BLOCK_QUOTE :
1158
1073
if (!parse_block_quote_prefix (parser , input ))
@@ -1212,10 +1127,11 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1212
1127
bool has_content ;
1213
1128
int save_offset ;
1214
1129
int save_column ;
1130
+ size_t depth = 0 ;
1215
1131
1216
1132
while (cont_type != CMARK_NODE_CODE_BLOCK &&
1217
1133
cont_type != CMARK_NODE_HTML_BLOCK ) {
1218
-
1134
+ depth ++ ;
1219
1135
S_find_first_nonspace (parser , input );
1220
1136
indented = parser -> indent >= CODE_INDENT ;
1221
1137
@@ -1286,9 +1202,8 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1286
1202
has_content = resolve_reference_link_definitions (parser , * container );
1287
1203
1288
1204
if (has_content ) {
1289
- cmark_node_set_type (* container , CMARK_NODE_HEADING );
1290
- decr_open_block_count (parser , CMARK_NODE_PARAGRAPH );
1291
- incr_open_block_count (parser , CMARK_NODE_HEADING );
1205
+
1206
+ (* container )-> type = (uint16_t )CMARK_NODE_HEADING ;
1292
1207
(* container )-> as .heading .level = lev ;
1293
1208
(* container )-> as .heading .setext = true;
1294
1209
S_advance_offset (parser , input , input -> len - 1 - parser -> offset , false);
@@ -1318,6 +1233,7 @@ static void open_new_blocks(cmark_parser *parser, cmark_node **container,
1318
1233
(* container )-> internal_offset = matched ;
1319
1234
} else if ((!indented || cont_type == CMARK_NODE_LIST ) &&
1320
1235
parser -> indent < 4 &&
1236
+ depth < MAX_LIST_DEPTH &&
1321
1237
(matched = parse_list_marker (
1322
1238
parser -> mem , input , parser -> first_nonspace ,
1323
1239
(* container )-> type == CMARK_NODE_PARAGRAPH , & data ))) {
@@ -1443,7 +1359,7 @@ static void add_text_to_container(cmark_parser *parser, cmark_node *container,
1443
1359
S_set_last_line_blank (container , last_line_blank );
1444
1360
1445
1361
tmp = container ;
1446
- while (tmp -> parent && S_last_line_blank ( tmp -> parent ) ) {
1362
+ while (tmp -> parent ) {
1447
1363
S_set_last_line_blank (tmp -> parent , false);
1448
1364
tmp = tmp -> parent ;
1449
1365
}
@@ -1572,7 +1488,6 @@ static void S_process_line(cmark_parser *parser, const unsigned char *buffer,
1572
1488
1573
1489
parser -> line_number ++ ;
1574
1490
1575
- assert (parser -> current -> next == NULL );
1576
1491
last_matched_container = check_open_blocks (parser , & input , & all_matched );
1577
1492
1578
1493
if (!last_matched_container )
0 commit comments