Skip to content

Commit eefdb1f

Browse files
committed
👌 Improve nested emphasis parsing
This fixes quadratic complexity in e.g. `**<...>**a**<...>**` Implementation of upstream commit: markdown-it/markdown-it@24abaa5
1 parent 4e6dfd5 commit eefdb1f

File tree

5 files changed

+38
-25
lines changed

5 files changed

+38
-25
lines changed

markdown_it/port.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
- package: markdown-it/markdown-it
2-
version: 12.2.0
3-
commit: 6e2de08a0b03d3d0dcc524b89710ce05f83a0283
4-
date: Aug 2, 2021
2+
version: 12.3.0
3+
commit: 2e31d3430187d2eee1ba120c954783eebb93b4e8
4+
date: Dec 9, 2021
55
notes:
66
- Rename variables that use python built-in names, e.g.
77
- `max` -> `maximum`

markdown_it/rules_inline/balance_pairs.py

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,33 @@
66

77
def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
88
"""For each opening emphasis-like marker find a matching closing one."""
9+
if not delimiters:
10+
return
11+
912
openersBottom = {}
1013
maximum = len(delimiters)
1114

15+
# headerIdx is the first delimiter of the current (where closer is) delimiter run
16+
headerIdx = 0
17+
lastTokenIdx = -2 # needs any value lower than -1
18+
jumps: list[int] = []
1219
closerIdx = 0
1320
while closerIdx < maximum:
1421
closer = delimiters[closerIdx]
1522

23+
jumps.append(0)
24+
25+
# markers belong to same delimiter run if:
26+
# - they have adjacent tokens
27+
# - AND markers are the same
28+
#
29+
if (
30+
delimiters[headerIdx].marker != closer.marker
31+
or lastTokenIdx != closer.token - 1
32+
):
33+
headerIdx = closerIdx
34+
lastTokenIdx = closer.token
35+
1636
# Length is only used for emphasis-specific "rule of 3",
1737
# if it's not defined (in strikethrough or 3rd party plugins),
1838
# we can default it to 0 to disable those checks.
@@ -34,20 +54,15 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
3454
(3 if closer.open else 0) + (closer.length % 3)
3555
]
3656

37-
openerIdx = closerIdx - closer.jump - 1
38-
39-
# avoid crash if `closer.jump` is pointing outside of the array,
40-
# e.g. for strikethrough
41-
if openerIdx < -1:
42-
openerIdx = -1
57+
openerIdx = headerIdx - jumps[headerIdx] - 1
4358

4459
newMinOpenerIdx = openerIdx
4560

4661
while openerIdx > minOpenerIdx:
4762
opener = delimiters[openerIdx]
4863

4964
if opener.marker != closer.marker:
50-
openerIdx -= opener.jump + 1
65+
openerIdx -= jumps[openerIdx] + 1
5166
continue
5267

5368
if opener.open and opener.end < 0:
@@ -73,19 +88,25 @@ def processDelimiters(state: StateInline, delimiters: list[Delimiter]) -> None:
7388
# sure algorithm has linear complexity (see *_*_*_*_*_... case).
7489
#
7590
if openerIdx > 0 and not delimiters[openerIdx - 1].open:
76-
lastJump = delimiters[openerIdx - 1].jump + 1
91+
lastJump = jumps[openerIdx - 1] + 1
7792
else:
7893
lastJump = 0
7994

80-
closer.jump = closerIdx - openerIdx + lastJump
95+
jumps[closerIdx] = closerIdx - openerIdx + lastJump
96+
jumps[openerIdx] = lastJump
97+
8198
closer.open = False
8299
opener.end = closerIdx
83-
opener.jump = lastJump
84100
opener.close = False
85101
newMinOpenerIdx = -1
102+
103+
# treat next token as start of run,
104+
# it optimizes skips in **<...>**a**<...>** pathological case
105+
lastTokenIdx = -2
106+
86107
break
87108

88-
openerIdx -= opener.jump + 1
109+
openerIdx -= jumps[openerIdx] + 1
89110

90111
if newMinOpenerIdx != -1:
91112
# If match for this delimiter run failed, we want to set lower bound for

markdown_it/rules_inline/emphasis.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ def tokenize(state: StateInline, silent: bool) -> bool:
2525
Delimiter(
2626
marker=ord(marker),
2727
length=scanned.length,
28-
jump=i,
2928
token=len(state.tokens) - 1,
3029
end=-1,
3130
open=scanned.can_open,
@@ -63,9 +62,11 @@ def _postProcess(state: StateInline, delimiters: list[Delimiter]) -> None:
6362
isStrong = (
6463
i > 0
6564
and delimiters[i - 1].end == startDelim.end + 1
65+
# check that first two markers match and adjacent
66+
and delimiters[i - 1].marker == startDelim.marker
6667
and delimiters[i - 1].token == startDelim.token - 1
68+
# check that last two markers are adjacent (we can safely assume they match)
6769
and delimiters[startDelim.end + 1].token == endDelim.token + 1
68-
and delimiters[i - 1].marker == startDelim.marker
6970
)
7071

7172
ch = chr(startDelim.marker)

markdown_it/rules_inline/state_inline.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,6 @@ class Delimiter:
2222
# Total length of these series of delimiters.
2323
length: int
2424

25-
# An amount of characters before this one that's equivalent to
26-
# current one. In plain English: if this delimiter does not open
27-
# an emphasis, neither do previous `jump` characters.
28-
#
29-
# Used to skip sequences like "*****" in one step, for 1st asterisk
30-
# value will be 0, for 2nd it's 1 and so on.
31-
jump: int
32-
3325
# A position of the token this delimiter corresponds to.
3426
token: int
3527

markdown_it/rules_inline/strikethrough.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ def tokenize(state: StateInline, silent: bool) -> bool:
3434
Delimiter(
3535
marker=ord(ch),
3636
length=0, # disable "rule of 3" length checks meant for emphasis
37-
jump=i // 2, # for `~~` 1 marker = 2 characters
3837
token=len(state.tokens) - 1,
3938
end=-1,
4039
open=scanned.can_open,

0 commit comments

Comments
 (0)