23
23
from typing_extensions import Final , TypeAlias as _TypeAlias
24
24
25
25
from mypy .fscache import FileSystemCache
26
+ from mypy .nodes import MypyFile
26
27
from mypy .options import Options
27
28
from mypy .stubinfo import is_legacy_bundled_package
28
29
from mypy import pyinfo
@@ -126,6 +127,33 @@ def __repr__(self) -> str:
126
127
self .base_dir )
127
128
128
129
130
+ class BuildSourceSet :
131
+ """Helper to efficiently test a file's membership in a set of build sources."""
132
+
133
+ def __init__ (self , sources : List [BuildSource ]) -> None :
134
+ self .source_text_present = False
135
+ self .source_modules = {} # type: Dict[str, str]
136
+ self .source_paths = set () # type: Set[str]
137
+
138
+ for source in sources :
139
+ if source .text is not None :
140
+ self .source_text_present = True
141
+ if source .path :
142
+ self .source_paths .add (source .path )
143
+ if source .module :
144
+ self .source_modules [source .module ] = source .path or ''
145
+
146
+ def is_source (self , file : MypyFile ) -> bool :
147
+ if file .path and file .path in self .source_paths :
148
+ return True
149
+ elif file ._fullname in self .source_modules :
150
+ return True
151
+ elif self .source_text_present :
152
+ return True
153
+ else :
154
+ return False
155
+
156
+
129
157
class FindModuleCache :
130
158
"""Module finder with integrated cache.
131
159
@@ -141,8 +169,10 @@ def __init__(self,
141
169
search_paths : SearchPaths ,
142
170
fscache : Optional [FileSystemCache ],
143
171
options : Optional [Options ],
144
- stdlib_py_versions : Optional [StdlibVersions ] = None ) -> None :
172
+ stdlib_py_versions : Optional [StdlibVersions ] = None ,
173
+ source_set : Optional [BuildSourceSet ] = None ) -> None :
145
174
self .search_paths = search_paths
175
+ self .source_set = source_set
146
176
self .fscache = fscache or FileSystemCache ()
147
177
# Cache for get_toplevel_possibilities:
148
178
# search_paths -> (toplevel_id -> list(package_dirs))
@@ -164,6 +194,53 @@ def clear(self) -> None:
164
194
self .initial_components .clear ()
165
195
self .ns_ancestors .clear ()
166
196
197
+ def find_module_via_source_set (self , id : str ) -> Optional [ModuleSearchResult ]:
198
+ """Fast path to find modules by looking through the input sources
199
+
200
+ This is only used when --fast-module-lookup is passed on the command line."""
201
+ if not self .source_set :
202
+ return None
203
+
204
+ p = self .source_set .source_modules .get (id , None )
205
+ if p and self .fscache .isfile (p ):
206
+ # We need to make sure we still have __init__.py all the way up
207
+ # otherwise we might have false positives compared to slow path
208
+ # in case of deletion of init files, which is covered by some tests.
209
+ # TODO: are there some combination of flags in which this check should be skipped?
210
+ d = os .path .dirname (p )
211
+ for _ in range (id .count ('.' )):
212
+ if not any (self .fscache .isfile (os .path .join (d , '__init__' + x ))
213
+ for x in PYTHON_EXTENSIONS ):
214
+ return None
215
+ d = os .path .dirname (d )
216
+ return p
217
+
218
+ idx = id .rfind ('.' )
219
+ if idx != - 1 :
220
+ # When we're looking for foo.bar.baz and can't find a matching module
221
+ # in the source set, look up for a foo.bar module.
222
+ parent = self .find_module_via_source_set (id [:idx ])
223
+ if parent is None or not isinstance (parent , str ):
224
+ return None
225
+
226
+ basename , ext = os .path .splitext (parent )
227
+ if (not any (parent .endswith ('__init__' + x ) for x in PYTHON_EXTENSIONS )
228
+ and (ext in PYTHON_EXTENSIONS and not self .fscache .isdir (basename ))):
229
+ # If we do find such a *module* (and crucially, we don't want a package,
230
+ # hence the filtering out of __init__ files, and checking for the presence
231
+ # of a folder with a matching name), then we can be pretty confident that
232
+ # 'baz' will either be a top-level variable in foo.bar, or will not exist.
233
+ #
234
+ # Either way, spelunking in other search paths for another 'foo.bar.baz'
235
+ # module should be avoided because:
236
+ # 1. in the unlikely event that one were found, it's highly likely that
237
+ # it would be unrelated to the source being typechecked and therefore
238
+ # more likely to lead to erroneous results
239
+ # 2. as described in _find_module, in some cases the search itself could
240
+ # potentially waste significant amounts of time
241
+ return ModuleNotFoundReason .NOT_FOUND
242
+ return None
243
+
167
244
def find_lib_path_dirs (self , id : str , lib_path : Tuple [str , ...]) -> PackageDirs :
168
245
"""Find which elements of a lib_path have the directory a module needs to exist.
169
246
@@ -229,7 +306,7 @@ def find_module(self, id: str, *, fast_path: bool = False) -> ModuleSearchResult
229
306
elif top_level in self .stdlib_py_versions :
230
307
use_typeshed = self ._typeshed_has_version (top_level )
231
308
self .results [id ] = self ._find_module (id , use_typeshed )
232
- if (not fast_path
309
+ if (not ( fast_path or ( self . options is not None and self . options . fast_module_lookup ))
233
310
and self .results [id ] is ModuleNotFoundReason .NOT_FOUND
234
311
and self ._can_find_module_in_parent_dir (id )):
235
312
self .results [id ] = ModuleNotFoundReason .WRONG_WORKING_DIRECTORY
@@ -295,6 +372,39 @@ def _can_find_module_in_parent_dir(self, id: str) -> bool:
295
372
def _find_module (self , id : str , use_typeshed : bool ) -> ModuleSearchResult :
296
373
fscache = self .fscache
297
374
375
+ # Fast path for any modules in the current source set.
376
+ # This is particularly important when there are a large number of search
377
+ # paths which share the first (few) component(s) due to the use of namespace
378
+ # packages, for instance:
379
+ # foo/
380
+ # company/
381
+ # __init__.py
382
+ # foo/
383
+ # bar/
384
+ # company/
385
+ # __init__.py
386
+ # bar/
387
+ # baz/
388
+ # company/
389
+ # __init__.py
390
+ # baz/
391
+ #
392
+ # mypy gets [foo/company/foo, bar/company/bar, baz/company/baz, ...] as input
393
+ # and computes [foo, bar, baz, ...] as the module search path.
394
+ #
395
+ # This would result in O(n) search for every import of company.*, leading to
396
+ # O(n**2) behavior in load_graph as such imports are unsurprisingly present
397
+ # at least once, and usually many more times than that, in each and every file
398
+ # being parsed.
399
+ #
400
+ # Thankfully, such cases are efficiently handled by looking up the module path
401
+ # via BuildSourceSet.
402
+ p = (self .find_module_via_source_set (id )
403
+ if (self .options is not None and self .options .fast_module_lookup )
404
+ else None )
405
+ if p :
406
+ return p
407
+
298
408
# If we're looking for a module like 'foo.bar.baz', it's likely that most of the
299
409
# many elements of lib_path don't even have a subdirectory 'foo/bar'. Discover
300
410
# that only once and cache it for when we look for modules like 'foo.bar.blah'
0 commit comments