Skip to content

Commit e20aaee

Browse files
committed
Let mypyc optimise os.path.join (#17949)
See #17948 There's one call site which has varargs that I leave as os.path.join, it doesn't show up on my profile. I do see the `endswith` on the profile, we could try `path[-1] == '/'` instead (could save a few dozen milliseconds) In my work environment, this is about a 10% speedup: ``` λ hyperfine -w 1 -M 3 '/tmp/mypy_primer/timer_mypy_6eddd3ab1/venv/bin/mypy -c "import torch" --no-incremental --python-executable /opt/oai/bin/python' Benchmark 1: /tmp/mypy_primer/timer_mypy_6eddd3ab1/venv/bin/mypy -c "import torch" --no-incremental --python-executable /opt/oai/bin/python Time (mean ± σ): 30.842 s ± 0.119 s [User: 26.383 s, System: 4.396 s] Range (min … max): 30.706 s … 30.927 s 3 runs ``` Compared to: ``` λ hyperfine -w 1 -M 3 '/tmp/mypy_primer/timer_mypy_88ae62b4a/venv/bin/mypy -c "import torch" --no-incremental --python-executable /opt/oai/bin/python' Benchmark 1: /tmp/mypy_primer/timer_mypy_88ae62b4a/venv/bin/mypy -c "import torch" --no-incremental --python-executable /opt/oai/bin/python Time (mean ± σ): 34.161 s ± 0.163 s [User: 29.818 s, System: 4.289 s] Range (min … max): 34.013 s … 34.336 s 3 runs ``` In the toy "long" environment mentioned in the issue, this is about a 7% speedup: ``` λ hyperfine -w 1 -M 3 '/tmp/mypy_primer/timer_mypy_6eddd3ab1/venv/bin/mypy -c "import torch" --no-incremental --python-executable long/bin/python' Benchmark 1: /tmp/mypy_primer/timer_mypy_6eddd3ab1/venv/bin/mypy -c "import torch" --no-incremental --python-executable long/bin/python Time (mean ± σ): 23.177 s ± 0.317 s [User: 20.265 s, System: 2.873 s] Range (min … max): 22.815 s … 23.407 s 3 runs ``` Compared to: ``` λ hyperfine -w 1 -M 3 '/tmp/mypy_primer/timer_mypy_88ae62b4a/venv/bin/mypy -c "import torch" --python-executable=long/bin/python --no-incremental' Benchmark 1: /tmp/mypy_primer/timer_mypy_88ae62b4a/venv/bin/mypy -c "import torch" --python-executable=long/bin/python --no-incremental Time (mean ± σ): 24.838 s ± 0.237 s [User: 22.038 s, System: 2.750 s] Range (min … max): 24.598 s … 25.073 s 3 runs ``` In the "clean" environment, this is a 1% speedup, but below the noise floor.
1 parent 159974c commit e20aaee

File tree

2 files changed

+33
-15
lines changed

2 files changed

+33
-15
lines changed

mypy/modulefinder.py

+16-15
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
from mypy.nodes import MypyFile
2323
from mypy.options import Options
2424
from mypy.stubinfo import approved_stub_package_exists
25+
from mypy.util import os_path_join
2526

2627

2728
# Paths to be searched in find_module().
@@ -205,7 +206,7 @@ def find_module_via_source_set(self, id: str) -> ModuleSearchResult | None:
205206
d = os.path.dirname(p)
206207
for _ in range(id.count(".")):
207208
if not any(
208-
self.fscache.isfile(os.path.join(d, "__init__" + x)) for x in PYTHON_EXTENSIONS
209+
self.fscache.isfile(os_path_join(d, "__init__" + x)) for x in PYTHON_EXTENSIONS
209210
):
210211
return None
211212
d = os.path.dirname(d)
@@ -249,7 +250,7 @@ def find_lib_path_dirs(self, id: str, lib_path: tuple[str, ...]) -> PackageDirs:
249250
dirs = []
250251
for pathitem in self.get_toplevel_possibilities(lib_path, components[0]):
251252
# e.g., '/usr/lib/python3.4/foo/bar'
252-
dir = os.path.normpath(os.path.join(pathitem, dir_chain))
253+
dir = os.path.normpath(os_path_join(pathitem, dir_chain))
253254
if self.fscache.isdir(dir):
254255
dirs.append((dir, True))
255256
return dirs
@@ -320,8 +321,8 @@ def _find_module_non_stub_helper(
320321
plausible_match = False
321322
dir_path = pkg_dir
322323
for index, component in enumerate(components):
323-
dir_path = os.path.join(dir_path, component)
324-
if self.fscache.isfile(os.path.join(dir_path, "py.typed")):
324+
dir_path = os_path_join(dir_path, component)
325+
if self.fscache.isfile(os_path_join(dir_path, "py.typed")):
325326
return os.path.join(pkg_dir, *components[:-1]), index == 0
326327
elif not plausible_match and (
327328
self.fscache.isdir(dir_path) or self.fscache.isfile(dir_path + ".py")
@@ -418,9 +419,9 @@ def _find_module(self, id: str, use_typeshed: bool) -> ModuleSearchResult:
418419
# Third-party stub/typed packages
419420
for pkg_dir in self.search_paths.package_path:
420421
stub_name = components[0] + "-stubs"
421-
stub_dir = os.path.join(pkg_dir, stub_name)
422+
stub_dir = os_path_join(pkg_dir, stub_name)
422423
if fscache.isdir(stub_dir):
423-
stub_typed_file = os.path.join(stub_dir, "py.typed")
424+
stub_typed_file = os_path_join(stub_dir, "py.typed")
424425
stub_components = [stub_name] + components[1:]
425426
path = os.path.join(pkg_dir, *stub_components[:-1])
426427
if fscache.isdir(path):
@@ -430,7 +431,7 @@ def _find_module(self, id: str, use_typeshed: bool) -> ModuleSearchResult:
430431
# Partial here means that mypy should look at the runtime
431432
# package if installed.
432433
if fscache.read(stub_typed_file).decode().strip() == "partial":
433-
runtime_path = os.path.join(pkg_dir, dir_chain)
434+
runtime_path = os_path_join(pkg_dir, dir_chain)
434435
third_party_inline_dirs.append((runtime_path, True))
435436
# if the package is partial, we don't verify the module, as
436437
# the partial stub package may not have a __init__.pyi
@@ -580,7 +581,7 @@ def find_modules_recursive(self, module: str) -> list[BuildSource]:
580581
# Skip certain names altogether
581582
if name in ("__pycache__", "site-packages", "node_modules") or name.startswith("."):
582583
continue
583-
subpath = os.path.join(package_path, name)
584+
subpath = os_path_join(package_path, name)
584585

585586
if self.options and matches_exclude(
586587
subpath, self.options.exclude, self.fscache, self.options.verbosity >= 2
@@ -590,8 +591,8 @@ def find_modules_recursive(self, module: str) -> list[BuildSource]:
590591
if self.fscache.isdir(subpath):
591592
# Only recurse into packages
592593
if (self.options and self.options.namespace_packages) or (
593-
self.fscache.isfile(os.path.join(subpath, "__init__.py"))
594-
or self.fscache.isfile(os.path.join(subpath, "__init__.pyi"))
594+
self.fscache.isfile(os_path_join(subpath, "__init__.py"))
595+
or self.fscache.isfile(os_path_join(subpath, "__init__.pyi"))
595596
):
596597
seen.add(name)
597598
sources.extend(self.find_modules_recursive(module + "." + name))
@@ -636,7 +637,7 @@ def verify_module(fscache: FileSystemCache, id: str, path: str, prefix: str) ->
636637
for i in range(id.count(".")):
637638
path = os.path.dirname(path)
638639
if not any(
639-
fscache.isfile_case(os.path.join(path, f"__init__{extension}"), prefix)
640+
fscache.isfile_case(os_path_join(path, f"__init__{extension}"), prefix)
640641
for extension in PYTHON_EXTENSIONS
641642
):
642643
return False
@@ -651,7 +652,7 @@ def highest_init_level(fscache: FileSystemCache, id: str, path: str, prefix: str
651652
for i in range(id.count(".")):
652653
path = os.path.dirname(path)
653654
if any(
654-
fscache.isfile_case(os.path.join(path, f"__init__{extension}"), prefix)
655+
fscache.isfile_case(os_path_join(path, f"__init__{extension}"), prefix)
655656
for extension in PYTHON_EXTENSIONS
656657
):
657658
level = i + 1
@@ -842,11 +843,11 @@ def load_stdlib_py_versions(custom_typeshed_dir: str | None) -> StdlibVersions:
842843
843844
None means there is no maximum version.
844845
"""
845-
typeshed_dir = custom_typeshed_dir or os.path.join(os.path.dirname(__file__), "typeshed")
846-
stdlib_dir = os.path.join(typeshed_dir, "stdlib")
846+
typeshed_dir = custom_typeshed_dir or os_path_join(os.path.dirname(__file__), "typeshed")
847+
stdlib_dir = os_path_join(typeshed_dir, "stdlib")
847848
result = {}
848849

849-
versions_path = os.path.join(stdlib_dir, "VERSIONS")
850+
versions_path = os_path_join(stdlib_dir, "VERSIONS")
850851
assert os.path.isfile(versions_path), (custom_typeshed_dir, versions_path, __file__)
851852
with open(versions_path) as f:
852853
for line in f:

mypy/util.py

+17
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,23 @@ def is_sub_path(path1: str, path2: str) -> bool:
417417
return pathlib.Path(path2) in pathlib.Path(path1).parents
418418

419419

420+
if sys.platform == "linux" or sys.platform == "darwin":
421+
422+
def os_path_join(path: str, b: str) -> str:
423+
# Based off of os.path.join, but simplified to str-only, 2 args and mypyc can compile it.
424+
if b.startswith("/") or not path:
425+
return b
426+
elif path.endswith("/"):
427+
return path + b
428+
else:
429+
return path + "/" + b
430+
431+
else:
432+
433+
def os_path_join(a: str, p: str) -> str:
434+
return os.path.join(a, p)
435+
436+
420437
def hard_exit(status: int = 0) -> None:
421438
"""Kill the current process without fully cleaning up.
422439

0 commit comments

Comments
 (0)