Skip to content

Commit 2416dbf

Browse files
committed
Significantly speed up file handling error paths (#17920)
This can have a huge overall impact on mypy performance when search paths are long
1 parent b8429f4 commit 2416dbf

File tree

3 files changed

+40
-49
lines changed

3 files changed

+40
-49
lines changed

mypy/build.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -736,8 +736,8 @@ def maybe_swap_for_shadow_path(self, path: str) -> str:
736736
shadow_file = self.shadow_equivalence_map.get(path)
737737
return shadow_file if shadow_file else path
738738

739-
def get_stat(self, path: str) -> os.stat_result:
740-
return self.fscache.stat(self.maybe_swap_for_shadow_path(path))
739+
def get_stat(self, path: str) -> os.stat_result | None:
740+
return self.fscache.stat_or_none(self.maybe_swap_for_shadow_path(path))
741741

742742
def getmtime(self, path: str) -> int:
743743
"""Return a file's mtime; but 0 in bazel mode.
@@ -1394,9 +1394,9 @@ def validate_meta(
13941394
if bazel:
13951395
# Normalize path under bazel to make sure it isn't absolute
13961396
path = normpath(path, manager.options)
1397-
try:
1398-
st = manager.get_stat(path)
1399-
except OSError:
1397+
1398+
st = manager.get_stat(path)
1399+
if st is None:
14001400
return None
14011401
if not stat.S_ISDIR(st.st_mode) and not stat.S_ISREG(st.st_mode):
14021402
manager.log(f"Metadata abandoned for {id}: file or directory {path} does not exist")
@@ -1572,10 +1572,9 @@ def write_cache(
15721572
plugin_data = manager.plugin.report_config_data(ReportConfigContext(id, path, is_check=False))
15731573

15741574
# Obtain and set up metadata
1575-
try:
1576-
st = manager.get_stat(path)
1577-
except OSError as err:
1578-
manager.log(f"Cannot get stat for {path}: {err}")
1575+
st = manager.get_stat(path)
1576+
if st is None:
1577+
manager.log(f"Cannot get stat for {path}")
15791578
# Remove apparently-invalid cache files.
15801579
# (This is purely an optimization.)
15811580
for filename in [data_json, meta_json]:

mypy/fscache.py

+26-33
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,8 @@ def set_package_root(self, package_root: list[str]) -> None:
5151

5252
def flush(self) -> None:
5353
"""Start another transaction and empty all caches."""
54-
self.stat_cache: dict[str, os.stat_result] = {}
55-
self.stat_error_cache: dict[str, OSError] = {}
54+
self.stat_or_none_cache: dict[str, os.stat_result | None] = {}
55+
5656
self.listdir_cache: dict[str, list[str]] = {}
5757
self.listdir_error_cache: dict[str, OSError] = {}
5858
self.isfile_case_cache: dict[str, bool] = {}
@@ -62,24 +62,21 @@ def flush(self) -> None:
6262
self.hash_cache: dict[str, str] = {}
6363
self.fake_package_cache: set[str] = set()
6464

65-
def stat(self, path: str) -> os.stat_result:
66-
if path in self.stat_cache:
67-
return self.stat_cache[path]
68-
if path in self.stat_error_cache:
69-
raise copy_os_error(self.stat_error_cache[path])
65+
def stat_or_none(self, path: str) -> os.stat_result | None:
66+
if path in self.stat_or_none_cache:
67+
return self.stat_or_none_cache[path]
68+
69+
st = None
7070
try:
7171
st = os.stat(path)
72-
except OSError as err:
72+
except OSError:
7373
if self.init_under_package_root(path):
7474
try:
75-
return self._fake_init(path)
75+
st = self._fake_init(path)
7676
except OSError:
7777
pass
78-
# Take a copy to get rid of associated traceback and frame objects.
79-
# Just assigning to __traceback__ doesn't free them.
80-
self.stat_error_cache[path] = copy_os_error(err)
81-
raise err
82-
self.stat_cache[path] = st
78+
79+
self.stat_or_none_cache[path] = st
8380
return st
8481

8582
def init_under_package_root(self, path: str) -> bool:
@@ -112,9 +109,9 @@ def init_under_package_root(self, path: str) -> bool:
112109
if not os.path.basename(dirname).isidentifier():
113110
# Can't put an __init__.py in a place that's not an identifier
114111
return False
115-
try:
116-
st = self.stat(dirname)
117-
except OSError:
112+
113+
st = self.stat_or_none(dirname)
114+
if st is None:
118115
return False
119116
else:
120117
if not stat.S_ISDIR(st.st_mode):
@@ -145,15 +142,14 @@ def _fake_init(self, path: str) -> os.stat_result:
145142
assert basename == "__init__.py", path
146143
assert not os.path.exists(path), path # Not cached!
147144
dirname = os.path.normpath(dirname)
148-
st = self.stat(dirname) # May raise OSError
145+
st = os.stat(dirname) # May raise OSError
149146
# Get stat result as a list so we can modify it.
150147
seq: list[float] = list(st)
151148
seq[stat.ST_MODE] = stat.S_IFREG | 0o444
152149
seq[stat.ST_INO] = 1
153150
seq[stat.ST_NLINK] = 1
154151
seq[stat.ST_SIZE] = 0
155152
st = os.stat_result(seq)
156-
self.stat_cache[path] = st
157153
# Make listdir() and read() also pretend this file exists.
158154
self.fake_package_cache.add(dirname)
159155
return st
@@ -181,9 +177,8 @@ def listdir(self, path: str) -> list[str]:
181177
return results
182178

183179
def isfile(self, path: str) -> bool:
184-
try:
185-
st = self.stat(path)
186-
except OSError:
180+
st = self.stat_or_none(path)
181+
if st is None:
187182
return False
188183
return stat.S_ISREG(st.st_mode)
189184

@@ -248,18 +243,14 @@ def exists_case(self, path: str, prefix: str) -> bool:
248243
return res
249244

250245
def isdir(self, path: str) -> bool:
251-
try:
252-
st = self.stat(path)
253-
except OSError:
246+
st = self.stat_or_none(path)
247+
if st is None:
254248
return False
255249
return stat.S_ISDIR(st.st_mode)
256250

257251
def exists(self, path: str) -> bool:
258-
try:
259-
self.stat(path)
260-
except FileNotFoundError:
261-
return False
262-
return True
252+
st = self.stat_or_none(path)
253+
return st is not None
263254

264255
def read(self, path: str) -> bytes:
265256
if path in self.read_cache:
@@ -269,7 +260,7 @@ def read(self, path: str) -> bytes:
269260

270261
# Need to stat first so that the contents of file are from no
271262
# earlier instant than the mtime reported by self.stat().
272-
self.stat(path)
263+
self.stat_or_none(path)
273264

274265
dirname, basename = os.path.split(path)
275266
dirname = os.path.normpath(dirname)
@@ -294,8 +285,10 @@ def hash_digest(self, path: str) -> str:
294285
return self.hash_cache[path]
295286

296287
def samefile(self, f1: str, f2: str) -> bool:
297-
s1 = self.stat(f1)
298-
s2 = self.stat(f2)
288+
s1 = self.stat_or_none(f1)
289+
s2 = self.stat_or_none(f2)
290+
if s1 is None or s2 is None:
291+
return False
299292
return os.path.samestat(s1, s2)
300293

301294

mypy/fswatcher.py

+6-7
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import os
56
from typing import AbstractSet, Iterable, NamedTuple
67

78
from mypy.fscache import FileSystemCache
@@ -56,18 +57,16 @@ def remove_watched_paths(self, paths: Iterable[str]) -> None:
5657
del self._file_data[path]
5758
self._paths -= set(paths)
5859

59-
def _update(self, path: str) -> None:
60-
st = self.fs.stat(path)
60+
def _update(self, path: str, st: os.stat_result) -> None:
6161
hash_digest = self.fs.hash_digest(path)
6262
self._file_data[path] = FileData(st.st_mtime, st.st_size, hash_digest)
6363

6464
def _find_changed(self, paths: Iterable[str]) -> AbstractSet[str]:
6565
changed = set()
6666
for path in paths:
6767
old = self._file_data[path]
68-
try:
69-
st = self.fs.stat(path)
70-
except FileNotFoundError:
68+
st = self.fs.stat_or_none(path)
69+
if st is None:
7170
if old is not None:
7271
# File was deleted.
7372
changed.add(path)
@@ -76,13 +75,13 @@ def _find_changed(self, paths: Iterable[str]) -> AbstractSet[str]:
7675
if old is None:
7776
# File is new.
7877
changed.add(path)
79-
self._update(path)
78+
self._update(path, st)
8079
# Round mtimes down, to match the mtimes we write to meta files
8180
elif st.st_size != old.st_size or int(st.st_mtime) != int(old.st_mtime):
8281
# Only look for changes if size or mtime has changed as an
8382
# optimization, since calculating hash is expensive.
8483
new_hash = self.fs.hash_digest(path)
85-
self._update(path)
84+
self._update(path, st)
8685
if st.st_size != old.st_size or new_hash != old.hash:
8786
# Changed file.
8887
changed.add(path)

0 commit comments

Comments
 (0)