Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit f4bef4b

Browse files
committedApr 11, 2025·
Use bytes methods for better performance
1 parent 56d40fa commit f4bef4b

File tree

1 file changed

+11
-11
lines changed

1 file changed

+11
-11
lines changed
 

‎build_docs.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,25 +1278,25 @@ def proofread_canonicals(
12781278
purge(http, *paths_to_purge)
12791279

12801280

1281-
_canonical_re = re.compile(
1282-
"""<link rel="canonical" href="https://docs.python.org/([^"]*)" />"""
1283-
)
1284-
1285-
12861281
def _check_canonical_rel(file: Path, www_root: Path):
12871282
# Check for a canonical relation link in the HTML.
12881283
# If one exists, ensure that the target exists
12891284
# or otherwise remove the canonical link element.
1290-
html = file.read_text(encoding="UTF-8", errors="surrogateescape")
1291-
canonical = _canonical_re.search(html)
1292-
if canonical is None:
1285+
prefix = b'<link rel="canonical" href="https://docs.python.org/'
1286+
suffix = b'" />'
1287+
pfx_len = len(prefix)
1288+
sfx_len = len(suffix)
1289+
html = file.read_bytes()
1290+
try:
1291+
start = html.index(prefix)
1292+
end = html.index(suffix, start + pfx_len)
1293+
except ValueError:
12931294
return None
1294-
target = canonical.group(1)
1295+
target = html[start + pfx_len : end].decode(errors="surrogateescape")
12951296
if (www_root / target).exists():
12961297
return None
12971298
logging.info("Removing broken canonical from %s to %s", file, target)
1298-
html = html.replace(canonical.group(0), "")
1299-
file.write_text(html, encoding="UTF-8", errors="surrogateescape")
1299+
file.write_bytes(html[:start] + html[end + sfx_len :])
13001300
return file
13011301

13021302

0 commit comments

Comments
 (0)
Please sign in to comment.