Skip to content

Commit 43f956b

Browse files
sildarchrisjsewell
andauthored
👌 IMPROVE: Parsing performance (#32)
Character codes (ordinals) of `state.src` are now pre-computed, to reduce redundant computations. Co-authored-by: Chris Sewell <[email protected]>
1 parent 4a7ac78 commit 43f956b

35 files changed

+166
-134
lines changed

.pre-commit-config.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ exclude: >
1414
1515
repos:
1616

17-
- repo: git://github.com/pre-commit/pre-commit-hooks
17+
- repo: https://github.com/pre-commit/pre-commit-hooks
1818
rev: v2.2.3
1919
hooks:
2020
- id: check-json

markdown_it/extensions/container/index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool
3636

3737
# Check out the first character quickly,
3838
# this should filter out most of non-containers
39-
if marker_char != charCodeAt(state.src, start):
39+
if marker_char != state.srcCharCode[start]:
4040
return False
4141

4242
# Check out the rest of the marker string
@@ -79,7 +79,7 @@ def container_func(state: StateBlock, startLine: int, endLine: int, silent: bool
7979
# test
8080
break
8181

82-
if marker_char != charCodeAt(state.src, start):
82+
if marker_char != state.srcCharCode[start]:
8383
continue
8484

8585
if state.sCount[nextLine] - state.blkIndent >= 4:

markdown_it/extensions/deflist/index.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
"""Process definition lists."""
22
from markdown_it import MarkdownIt
3-
from markdown_it.common.utils import charCodeAt
43
from markdown_it.rules_block import StateBlock
54

65

@@ -16,7 +15,7 @@ def skipMarker(state: StateBlock, line: int):
1615
return -1
1716

1817
# Check bullet
19-
marker = charCodeAt(state.src, start)
18+
marker = state.srcCharCode[start]
2019
start += 1
2120
if marker != 0x7E and marker != 0x3A: # ~ :
2221
return -1
@@ -119,7 +118,7 @@ def deflist(state: StateBlock, startLine: int, endLine: int, silent: bool):
119118
)
120119

121120
while pos < maximum:
122-
ch = charCodeAt(state.src, pos)
121+
ch = state.srcCharCode[pos]
123122

124123
if isSpace(ch):
125124
if ch == 0x09:

markdown_it/extensions/footnote/index.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from markdown_it.rules_inline import StateInline
77
from markdown_it.rules_block import StateBlock
88
from markdown_it.helpers import parseLinkLabel
9-
from markdown_it.common.utils import isSpace, charCodeAt
9+
from markdown_it.common.utils import isSpace
1010

1111

1212
def footnote_plugin(md: MarkdownIt):
@@ -43,23 +43,23 @@ def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool):
4343
if start + 4 > maximum:
4444
return False
4545

46-
if charCodeAt(state.src, start) != 0x5B: # /* [ */
46+
if state.srcCharCode[start] != 0x5B: # /* [ */
4747
return False
48-
if charCodeAt(state.src, start + 1) != 0x5E: # /* ^ */
48+
if state.srcCharCode[start + 1] != 0x5E: # /* ^ */
4949
return False
5050

5151
pos = start + 2
5252
while pos < maximum:
53-
if charCodeAt(state.src, pos) == 0x20:
53+
if state.srcCharCode[pos] == 0x20:
5454
return False
55-
if charCodeAt(state.src, pos) == 0x5D: # /* ] */
55+
if state.srcCharCode[pos] == 0x5D: # /* ] */
5656
break
5757
pos += 1
5858

5959
if pos == start + 2: # no empty footnote labels
6060
return False
6161
pos += 1
62-
if pos + 1 >= maximum or charCodeAt(state.src, pos) != 0x3A: # /* : */
62+
if pos + 1 >= maximum or state.srcCharCode[pos] != 0x3A: # /* : */
6363
return False
6464
if silent:
6565
return True
@@ -87,7 +87,7 @@ def footnote_def(state: StateBlock, startLine: int, endLine: int, silent: bool):
8787
)
8888

8989
while pos < maximum:
90-
ch = charCodeAt(state.src, pos)
90+
ch = state.srcCharCode[pos]
9191

9292
if isSpace(ch):
9393
if ch == 0x09:
@@ -136,9 +136,9 @@ def footnote_inline(state: StateInline, silent: bool):
136136

137137
if start + 2 >= maximum:
138138
return False
139-
if charCodeAt(state.src, start) != 0x5E: # /* ^ */
139+
if state.srcCharCode[start] != 0x5E: # /* ^ */
140140
return False
141-
if charCodeAt(state.src, start + 1) != 0x5B: # /* [ */
141+
if state.srcCharCode[start + 1] != 0x5B: # /* [ */
142142
return False
143143

144144
labelStart = start + 2
@@ -182,18 +182,18 @@ def footnote_ref(state: StateInline, silent: bool):
182182

183183
if "footnotes" not in state.env or "refs" not in state.env["footnotes"]:
184184
return False
185-
if charCodeAt(state.src, start) != 0x5B: # /* [ */
185+
if state.srcCharCode[start] != 0x5B: # /* [ */
186186
return False
187-
if charCodeAt(state.src, start + 1) != 0x5E: # /* ^ */
187+
if state.srcCharCode[start + 1] != 0x5E: # /* ^ */
188188
return False
189189

190190
pos = start + 2
191191
while pos < maximum:
192-
if charCodeAt(state.src, pos) == 0x20:
192+
if state.srcCharCode[pos] == 0x20:
193193
return False
194-
if charCodeAt(state.src, pos) == 0x0A:
194+
if state.srcCharCode[pos] == 0x0A:
195195
return False
196-
if charCodeAt(state.src, pos) == 0x5D: # /* ] */
196+
if state.srcCharCode[pos] == 0x5D: # /* ] */
197197
break
198198
pos += 1
199199

markdown_it/extensions/front_matter/index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool):
2929

3030
# Check out the first character of the first line quickly,
3131
# this should filter out non-front matter
32-
if startLine != 0 or marker_char != charCodeAt(state.src, 0):
32+
if startLine != 0 or marker_char != state.srcCharCode[0]:
3333
return False
3434

3535
# Check out the rest of the marker string
@@ -73,7 +73,7 @@ def frontMatter(state: StateBlock, startLine: int, endLine: int, silent: bool):
7373
# test
7474
break
7575

76-
if marker_char != charCodeAt(state.src, start):
76+
if marker_char != state.srcCharCode[start]:
7777
continue
7878

7979
if state.sCount[nextLine] - state.blkIndent >= 4:

markdown_it/extensions/myst_blocks/index.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from markdown_it import MarkdownIt
44
from markdown_it.rules_block import StateBlock
5-
from markdown_it.common.utils import charCodeAt, isSpace, escapeHtml
5+
from markdown_it.common.utils import isSpace, escapeHtml
66

77

88
TARGET_PATTERN = re.compile(r"^\(([a-zA-Z0-9\|\@\<\>\*\.\/\_\-\+\:]{1,100})\)\=\s*$")
@@ -40,7 +40,7 @@ def line_comment(state: StateBlock, startLine: int, endLine: int, silent: bool):
4040
if state.sCount[startLine] - state.blkIndent >= 4:
4141
return False
4242

43-
marker = charCodeAt(state.src, pos)
43+
marker = state.srcCharCode[pos]
4444
pos += 1
4545

4646
# Check block marker /* % */
@@ -70,7 +70,7 @@ def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool):
7070
if state.sCount[startLine] - state.blkIndent >= 4:
7171
return False
7272

73-
marker = charCodeAt(state.src, pos)
73+
marker = state.srcCharCode[pos]
7474
pos += 1
7575

7676
# Check block marker /* + */
@@ -81,7 +81,7 @@ def block_break(state: StateBlock, startLine: int, endLine: int, silent: bool):
8181

8282
cnt = 1
8383
while pos < maximum:
84-
ch = charCodeAt(state.src, pos)
84+
ch = state.srcCharCode[pos]
8585
if ch != marker and not isSpace(ch):
8686
break
8787
if ch == marker:

markdown_it/extensions/myst_role/index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from markdown_it import MarkdownIt
44
from markdown_it.rules_inline import StateInline
5-
from markdown_it.common.utils import charCodeAt, escapeHtml
5+
from markdown_it.common.utils import escapeHtml
66

77

88
PATTERN = re.compile(r"^\{([a-zA-Z0-9\_\-\+\:]{1,36})\}(`+)(?!`)(.+?)(?<!`)\2(?!`)")
@@ -15,7 +15,7 @@ def myst_role_plugin(md: MarkdownIt):
1515

1616
def myst_role(state: StateInline, silent: bool):
1717
try:
18-
if charCodeAt(state.src, state.pos - 1) == 0x5C: # /* \ */
18+
if state.srcCharCode[state.pos - 1] == 0x5C: # /* \ */
1919
# escaped (this could be improved in the case of edge case '\\{')
2020
return False
2121
except IndexError:

markdown_it/helpers/parse_link_label.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
returns the end of the label
66
77
"""
8-
from ..common.utils import charCodeAt
98

109

1110
def parseLinkLabel(state, start, disableNested=False):
@@ -18,7 +17,7 @@ def parseLinkLabel(state, start, disableNested=False):
1817
level = 1
1918

2019
while state.pos < state.posMax:
21-
marker = charCodeAt(state.src, state.pos)
20+
marker = state.srcCharCode[state.pos]
2221
if marker == 0x5D: # /* ] */)
2322
level -= 1
2423
if level == 0:

markdown_it/parser_block.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,10 +92,10 @@ def tokenize(
9292
line += 1
9393
state.line = line
9494

95-
def parse(self, src: str, md, env, outTokens: List[Token]):
95+
def parse(self, src: str, md, env, outTokens: List[Token], ords: List[int] = None):
9696
"""Process input string and push block tokens into `outTokens`."""
9797
if not src:
9898
return
99-
state = StateBlock(src, md, env, outTokens)
99+
state = StateBlock(src, md, env, outTokens, ords)
100100
self.tokenize(state, state.line, state.lineMax)
101101
return state.tokens

markdown_it/port.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,10 @@
1111
this is generally the main difference between the codes,
1212
because in python you can't do e.g. `for {i=1;i<x;i++} {}`
1313
- Use python version of `charCodeAt`
14+
- |
15+
Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state
16+
objects and sharing those whenever possible
17+
This provides a significant performance boost
1418
- |
1519
Use python's built-in `html.escape` and `urlparse.quote` methods, as a replacement for
1620
the JS dependencies [mdurl](https://www.npmjs.com/package/mdurl)

markdown_it/rules_block/blockquote.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import logging
33

44
from .state_block import StateBlock
5-
from ..common.utils import isSpace, charCodeAt
5+
from ..common.utils import isSpace
66

77
LOGGER = logging.getLogger(__name__)
88

@@ -22,7 +22,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
2222
return False
2323

2424
# check the block quote marker
25-
if charCodeAt(state.src, pos) != 0x3E: # /* > */
25+
if state.srcCharCode[pos] != 0x3E: # /* > */
2626
pos += 1
2727
return False
2828
pos += 1
@@ -40,15 +40,15 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
4040
)
4141

4242
# skip one optional space after '>'
43-
if charCodeAt(state.src, pos) == 0x20: # /* space */
43+
if state.srcCharCode[pos] == 0x20: # /* space */
4444
# ' > test '
4545
# ^ -- position start of line here:
4646
pos += 1
4747
initial += 1
4848
offset += 1
4949
adjustTab = False
5050
spaceAfterMarker = True
51-
elif charCodeAt(state.src, pos) == 0x09: # /* tab */
51+
elif state.srcCharCode[pos] == 0x09: # /* tab */
5252
spaceAfterMarker = True
5353

5454
if (state.bsCount[startLine] + offset) % 4 == 3:
@@ -71,7 +71,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
7171
state.bMarks[startLine] = pos
7272

7373
while pos < max:
74-
ch = charCodeAt(state.src, pos)
74+
ch = state.srcCharCode[pos]
7575

7676
if isSpace(ch):
7777
if ch == 0x09: # / tab /
@@ -147,9 +147,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
147147
# Case 1: line is not inside the blockquote, and this line is empty.
148148
break
149149

150-
evaluatesTrue = (
151-
charCodeAt(state.src, pos) == 0x3E and not wasOutdented
152-
) # /* > */
150+
evaluatesTrue = state.srcCharCode[pos] == 0x3E and not wasOutdented # /* > */
153151
pos += 1
154152
if evaluatesTrue:
155153
# This line is inside the blockquote.
@@ -162,15 +160,15 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
162160
)
163161

164162
# skip one optional space after '>'
165-
if charCodeAt(state.src, pos) == 0x20: # /* space */
163+
if state.srcCharCode[pos] == 0x20: # /* space */
166164
# ' > test '
167165
# ^ -- position start of line here:
168166
pos += 1
169167
initial += 1
170168
offset += 1
171169
adjustTab = False
172170
spaceAfterMarker = True
173-
elif charCodeAt(state.src, pos) == 0x09: # /* tab */
171+
elif state.srcCharCode[pos] == 0x09: # /* tab */
174172
spaceAfterMarker = True
175173

176174
if (state.bsCount[nextLine] + offset) % 4 == 3:
@@ -193,7 +191,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
193191
state.bMarks[nextLine] = pos
194192

195193
while pos < max:
196-
ch = charCodeAt(state.src, pos)
194+
ch = state.srcCharCode[pos]
197195

198196
if isSpace(ch):
199197
if ch == 0x09:

markdown_it/rules_block/fence.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# fences (``` lang, ~~~ lang)
22
import logging
33

4-
from ..common.utils import charCodeAt, stripEscape
4+
from ..common.utils import stripEscape
55
from .state_block import StateBlock
66

77
LOGGER = logging.getLogger(__name__)
@@ -22,7 +22,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
2222
if pos + 3 > maximum:
2323
return False
2424

25-
marker = charCodeAt(state.src, pos)
25+
marker = state.srcCharCode[pos]
2626

2727
# /* ~ */ /* ` */
2828
if marker != 0x7E and marker != 0x60:
@@ -68,7 +68,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
6868
# test
6969
break
7070

71-
if charCodeAt(state.src, pos) != marker:
71+
if state.srcCharCode[pos] != marker:
7272
continue
7373

7474
if state.sCount[nextLine] - state.blkIndent >= 4:

0 commit comments

Comments
 (0)