Skip to content

Implement JS-esque StateBase.srcCharCodeAt #190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion markdown_it/helpers/parse_link_label.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ def parseLinkLabel(state: StateInline, start: int, disableNested: bool = False)
level = 1

while state.pos < state.posMax:
marker = state.srcCharCode[state.pos]
marker = state.srcCharCodeAt(state.pos)
if marker == 0x5D: # /* ] */)
level -= 1
if level == 0:
Expand Down
5 changes: 3 additions & 2 deletions markdown_it/port.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@
to manipulate `Token.attrs`, which have an identical signature to those upstream.
- Use python version of `charCodeAt`
- |
Reduce use of charCodeAt() by storing char codes in a srcCharCodes attribute for state
objects and sharing those whenever possible
Reduce use of charCodeAt() by storing char codes in an `_ords` attribute
(accessible via `srcCharCodeAt` method) for state objects and sharing
those whenever possible.
This provides a significant performance boost
- |
In markdown_it/rules_block/reference.py,
Expand Down
21 changes: 19 additions & 2 deletions markdown_it/ruler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,14 +26,16 @@ class Ruler
TYPE_CHECKING,
Union,
)
import warnings

import attr

if TYPE_CHECKING:
from markdown_it import MarkdownIt


class StateBase:
srcCharCode: Tuple[int, ...]
_ords: Tuple[int, ...]

def __init__(self, src: str, md: "MarkdownIt", env: MutableMapping):
self.src = src
Expand All @@ -47,7 +49,22 @@ def src(self) -> str:
@src.setter
def src(self, value: str) -> None:
self._src = value
self.srcCharCode = tuple(ord(c) for c in self.src)
self._ords = tuple(ord(c) for c in self.src)

@property
def srcCharCode(self) -> Tuple[int, ...]:
warnings.warn(
"`StateBase.srcCharCode` is deprecated. Use `StateBase.srcCharCodeAt`",
DeprecationWarning,
stacklevel=2,
)
return self._ords

def srcCharCodeAt(self, idx: int) -> Optional[int]:
try:
return self._ords[idx]
except IndexError:
return None


# The first positional arg is always a subtype of `StateBase`. Other
Expand Down
27 changes: 8 additions & 19 deletions markdown_it/rules_block/blockquote.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Block quotes
import logging
from typing import Optional

from .state_block import StateBlock
from ..common.utils import isSpace
Expand All @@ -23,7 +22,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
return False

# check the block quote marker
if state.srcCharCode[pos] != 0x3E: # /* > */
if state.srcCharCodeAt(pos) != 0x3E: # /* > */
return False
pos += 1

Expand All @@ -35,21 +34,16 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
# set offset past spaces and ">"
initial = offset = state.sCount[startLine] + 1

try:
second_char_code: Optional[int] = state.srcCharCode[pos]
except IndexError:
second_char_code = None

# skip one optional space after '>'
if second_char_code == 0x20: # /* space */
if state.srcCharCodeAt(pos) == 0x20: # /* space */
# ' > test '
# ^ -- position start of line here:
pos += 1
initial += 1
offset += 1
adjustTab = False
spaceAfterMarker = True
elif second_char_code == 0x09: # /* tab */
elif state.srcCharCodeAt(pos) == 0x09: # /* tab */
spaceAfterMarker = True

if (state.bsCount[startLine] + offset) % 4 == 3:
Expand All @@ -72,7 +66,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
state.bMarks[startLine] = pos

while pos < max:
ch = state.srcCharCode[pos]
ch = state.srcCharCodeAt(pos)

if isSpace(ch):
if ch == 0x09: # / tab /
Expand Down Expand Up @@ -146,29 +140,24 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
# Case 1: line is not inside the blockquote, and this line is empty.
break

evaluatesTrue = state.srcCharCode[pos] == 0x3E and not isOutdented # /* > */
evaluatesTrue = state.srcCharCodeAt(pos) == 0x3E and not isOutdented # /* > */
pos += 1
if evaluatesTrue:
# This line is inside the blockquote.

# set offset past spaces and ">"
initial = offset = state.sCount[nextLine] + 1

try:
next_char: Optional[int] = state.srcCharCode[pos]
except IndexError:
next_char = None

# skip one optional space after '>'
if next_char == 0x20: # /* space */
if state.srcCharCodeAt(pos) == 0x20: # /* space */
# ' > test '
# ^ -- position start of line here:
pos += 1
initial += 1
offset += 1
adjustTab = False
spaceAfterMarker = True
elif next_char == 0x09: # /* tab */
elif state.srcCharCodeAt(pos) == 0x09: # /* tab */
spaceAfterMarker = True

if (state.bsCount[nextLine] + offset) % 4 == 3:
Expand All @@ -191,7 +180,7 @@ def blockquote(state: StateBlock, startLine: int, endLine: int, silent: bool):
state.bMarks[nextLine] = pos

while pos < max:
ch = state.srcCharCode[pos]
ch = state.srcCharCodeAt(pos)

if isSpace(ch):
if ch == 0x09:
Expand Down
4 changes: 2 additions & 2 deletions markdown_it/rules_block/fence.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
if pos + 3 > maximum:
return False

marker = state.srcCharCode[pos]
marker = state.srcCharCodeAt(pos)

# /* ~ */ /* ` */
if marker != 0x7E and marker != 0x60:
Expand Down Expand Up @@ -67,7 +67,7 @@ def fence(state: StateBlock, startLine: int, endLine: int, silent: bool):
# test
break

if state.srcCharCode[pos] != marker:
if state.srcCharCodeAt(pos) != marker:
continue

if state.sCount[nextLine] - state.blkIndent >= 4:
Expand Down
15 changes: 4 additions & 11 deletions markdown_it/rules_block/heading.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
""" Atex heading (#, ##, ...) """
import logging
from typing import Optional

from .state_block import StateBlock
from ..common.utils import isSpace
Expand All @@ -19,7 +18,7 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):
if state.sCount[startLine] - state.blkIndent >= 4:
return False

ch: Optional[int] = state.srcCharCode[pos]
ch = state.srcCharCodeAt(pos)

# /* # */
if ch != 0x23 or pos >= maximum:
Expand All @@ -28,18 +27,12 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):
# count heading level
level = 1
pos += 1
try:
ch = state.srcCharCode[pos]
except IndexError:
ch = None
ch = state.srcCharCodeAt(pos)
# /* # */
while ch == 0x23 and pos < maximum and level <= 6:
level += 1
pos += 1
try:
ch = state.srcCharCode[pos]
except IndexError:
ch = None
ch = state.srcCharCodeAt(pos)

if level > 6 or (pos < maximum and not isSpace(ch)):
return False
Expand All @@ -51,7 +44,7 @@ def heading(state: StateBlock, startLine: int, endLine: int, silent: bool):

maximum = state.skipSpacesBack(maximum, pos)
tmp = state.skipCharsBack(maximum, 0x23, pos) # #
if tmp > pos and isSpace(state.srcCharCode[tmp - 1]):
if tmp > pos and isSpace(state.srcCharCodeAt(tmp - 1)):
maximum = tmp

state.line = startLine + 1
Expand Down
4 changes: 2 additions & 2 deletions markdown_it/rules_block/hr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool):
if state.sCount[startLine] - state.blkIndent >= 4:
return False

marker = state.srcCharCode[pos]
marker = state.srcCharCodeAt(pos)
pos += 1

# Check hr marker: /* * */ /* - */ /* _ */
Expand All @@ -33,7 +33,7 @@ def hr(state: StateBlock, startLine: int, endLine: int, silent: bool):

cnt = 1
while pos < maximum:
ch = state.srcCharCode[pos]
ch = state.srcCharCodeAt(pos)
pos += 1
if ch != marker and not isSpace(ch):
return False
Expand Down
2 changes: 1 addition & 1 deletion markdown_it/rules_block/html_block.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def html_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
if not state.md.options.get("html", None):
return False

if state.srcCharCode[pos] != 0x3C: # /* < */
if state.srcCharCodeAt(pos) != 0x3C: # /* < */
return False

lineText = state.src[pos:maximum]
Expand Down
3 changes: 2 additions & 1 deletion markdown_it/rules_block/lheading.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
maximum = state.eMarks[nextLine]

if pos < maximum:
marker = state.srcCharCode[pos]
marker = state.srcCharCodeAt(pos)

# /* - */ /* = */
if marker == 0x2D or marker == 0x3D:
Expand Down Expand Up @@ -74,6 +74,7 @@ def lheading(state: StateBlock, startLine: int, endLine: int, silent: bool):
state.line = nextLine + 1

token = state.push("heading_open", "h" + str(level), 1)
assert marker is not None
token.markup = chr(marker)
token.map = [startLine, state.line]

Expand Down
19 changes: 11 additions & 8 deletions markdown_it/rules_block/list.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@ def skipBulletListMarker(state: StateBlock, startLine: int):
pos = state.bMarks[startLine] + state.tShift[startLine]
maximum = state.eMarks[startLine]

marker = state.srcCharCode[pos]
marker = state.srcCharCodeAt(pos)
pos += 1
# Check bullet /* * */ /* - */ /* + */
if marker != 0x2A and marker != 0x2D and marker != 0x2B:
return -1

if pos < maximum:
ch = state.srcCharCode[pos]
ch = state.srcCharCodeAt(pos)

if not isSpace(ch):
# " -test " - is not a list item
Expand All @@ -42,7 +42,8 @@ def skipOrderedListMarker(state: StateBlock, startLine: int):
if pos + 1 >= maximum:
return -1

ch = state.srcCharCode[pos]
ch = state.srcCharCodeAt(pos)
assert ch is not None
pos += 1

# /* 0 */ /* 9 */
Expand All @@ -54,7 +55,8 @@ def skipOrderedListMarker(state: StateBlock, startLine: int):
if pos >= maximum:
return -1

ch = state.srcCharCode[pos]
ch = state.srcCharCodeAt(pos)
assert ch is not None
pos += 1

# /* 0 */ /* 9 */
Expand All @@ -74,7 +76,7 @@ def skipOrderedListMarker(state: StateBlock, startLine: int):
return -1

if pos < maximum:
ch = state.srcCharCode[pos]
ch = state.srcCharCodeAt(pos)

if not isSpace(ch):
# " 1.test " - is not a list item
Expand Down Expand Up @@ -156,7 +158,8 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
return False

# We should terminate list on style change. Remember first one to compare.
markerCharCode = state.srcCharCode[posAfterMarker - 1]
markerCharCode = state.srcCharCodeAt(posAfterMarker - 1)
assert markerCharCode is not None

# For validation mode we can terminate immediately
if silent:
Expand Down Expand Up @@ -198,7 +201,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
)

while pos < maximum:
ch = state.srcCharCode[pos]
ch = state.srcCharCodeAt(pos)

if ch == 0x09: # \t
offset += 4 - (offset + state.bsCount[nextLine]) % 4
Expand Down Expand Up @@ -318,7 +321,7 @@ def list_block(state: StateBlock, startLine: int, endLine: int, silent: bool):
if posAfterMarker < 0:
break

if markerCharCode != state.srcCharCode[posAfterMarker - 1]:
if markerCharCode != state.srcCharCodeAt(posAfterMarker - 1):
break

# Finalize list
Expand Down
6 changes: 3 additions & 3 deletions markdown_it/rules_block/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ def reference(state: StateBlock, startLine, _endLine, silent):
if state.sCount[startLine] - state.blkIndent >= 4:
return False

if state.srcCharCode[pos] != 0x5B: # /* [ */
if state.srcCharCodeAt(pos) != 0x5B: # /* [ */
return False

# Simple check to quickly interrupt scan on [link](url) at the start of line.
# Can be useful on practice: https:#github.com/markdown-it/markdown-it/issues/54
while pos < maximum:
# /* ] */ /* \ */ /* : */
if state.srcCharCode[pos] == 0x5D and state.srcCharCode[pos - 1] != 0x5C:
if state.srcCharCodeAt(pos) == 0x5D and state.srcCharCodeAt(pos - 1) != 0x5C:
if pos + 1 == maximum:
return False
if state.srcCharCode[pos + 1] != 0x3A:
if state.srcCharCodeAt(pos + 1) != 0x3A:
return False
break
pos += 1
Expand Down
Loading