3
3
4
4
"""File wrangling."""
5
5
6
- import fnmatch
7
6
import hashlib
8
7
import ntpath
9
8
import os
@@ -172,7 +171,7 @@ def isabs_anywhere(filename):
172
171
173
172
174
173
def prep_patterns (patterns ):
175
- """Prepare the file patterns for use in a `FnmatchMatcher `.
174
+ """Prepare the file patterns for use in a `GlobMatcher `.
176
175
177
176
If a pattern starts with a wildcard, it is used as a pattern
178
177
as-is. If it does not start with a wildcard, then it is made
@@ -253,15 +252,15 @@ def match(self, module_name):
253
252
return False
254
253
255
254
256
- class FnmatchMatcher :
255
+ class GlobMatcher :
257
256
"""A matcher for files by file name pattern."""
258
257
def __init__ (self , pats , name = "unknown" ):
259
258
self .pats = list (pats )
260
- self .re = fnmatches_to_regex (self .pats , case_insensitive = env .WINDOWS )
259
+ self .re = globs_to_regex (self .pats , case_insensitive = env .WINDOWS )
261
260
self .name = name
262
261
263
262
def __repr__ (self ):
264
- return f"<FnmatchMatcher { self .name } { self .pats !r} >"
263
+ return f"<GlobMatcher { self .name } { self .pats !r} >"
265
264
266
265
def info (self ):
267
266
"""A list of strings for displaying when dumping state."""
@@ -282,37 +281,69 @@ def sep(s):
282
281
return the_sep
283
282
284
283
285
- def fnmatches_to_regex (patterns , case_insensitive = False , partial = False ):
286
- """Convert fnmatch patterns to a compiled regex that matches any of them.
284
+ # Tokenizer for _glob_to_regex.
285
+ # None as a sub means disallowed.
286
+ G2RX_TOKENS = [(re .compile (rx ), sub ) for rx , sub in [
287
+ (r"\*\*\*+" , None ), # Can't have ***
288
+ (r"[^/]+\*\*+" , None ), # Can't have x**
289
+ (r"\*\*+[^/]+" , None ), # Can't have **x
290
+ (r"\*\*/\*\*" , None ), # Can't have **/**
291
+ (r"^\*+/" , r"(.*[/\\\\])?" ), # ^*/ matches any prefix-slash, or nothing.
292
+ (r"/\*+$" , r"[/\\\\].*" ), # /*$ matches any slash-suffix.
293
+ (r"\*\*/" , r"(.*[/\\\\])?" ), # **/ matches any subdirs, including none
294
+ (r"/" , r"[/\\\\]" ), # / matches either slash or backslash
295
+ (r"\*" , r"[^/\\\\]*" ), # * matches any number of non slash-likes
296
+ (r"\?" , r"[^/\\\\]" ), # ? matches one non slash-like
297
+ (r"\[.*?\]" , r"\g<0>" ), # [a-f] matches [a-f]
298
+ (r"[a-zA-Z0-9_-]+" , r"\g<0>" ), # word chars match themselves
299
+ (r"[\[\]+{}]" , None ), # Can't have regex special chars
300
+ (r"." , r"\\\g<0>" ), # Anything else is escaped to be safe
301
+ ]]
302
+
303
+ def _glob_to_regex (pattern ):
304
+ """Convert a file-path glob pattern into a regex."""
305
+ # Turn all backslashes into slashes to simplify the tokenizer.
306
+ pattern = pattern .replace ("\\ " , "/" )
307
+ if "/" not in pattern :
308
+ pattern = "**/" + pattern
309
+ path_rx = []
310
+ pos = 0
311
+ while pos < len (pattern ):
312
+ for rx , sub in G2RX_TOKENS :
313
+ m = rx .match (pattern , pos = pos )
314
+ if m :
315
+ if sub is None :
316
+ raise ConfigError (f"File pattern can't include { m [0 ]!r} " )
317
+ path_rx .append (m .expand (sub ))
318
+ pos = m .end ()
319
+ break
320
+ return "" .join (path_rx )
321
+
322
+
323
+ def globs_to_regex (patterns , case_insensitive = False , partial = False ):
324
+ """Convert glob patterns to a compiled regex that matches any of them.
287
325
288
326
Slashes are always converted to match either slash or backslash, for
289
327
Windows support, even when running elsewhere.
290
328
329
+ If the pattern has no slash or backslash, then it is interpreted as
330
+ matching a file name anywhere it appears in the tree. Otherwise, the glob
331
+ pattern must match the whole file path.
332
+
291
333
If `partial` is true, then the pattern will match if the target string
292
334
starts with the pattern. Otherwise, it must match the entire string.
293
335
294
336
Returns: a compiled regex object. Use the .match method to compare target
295
337
strings.
296
338
297
339
"""
298
- regexes = (fnmatch .translate (pattern ) for pattern in patterns )
299
- # */ at the start should also match nothing.
300
- regexes = (re .sub (r"^\(\?s:\.\*(\\\\|/)" , r"(?s:^(.*\1)?" , regex ) for regex in regexes )
301
- # Be agnostic: / can mean backslash or slash.
302
- regexes = (re .sub (r"/" , r"[\\\\/]" , regex ) for regex in regexes )
303
-
304
- if partial :
305
- # fnmatch always adds a \Z to match the whole string, which we don't
306
- # want, so we remove the \Z. While removing it, we only replace \Z if
307
- # followed by paren (introducing flags), or at end, to keep from
308
- # destroying a literal \Z in the pattern.
309
- regexes = (re .sub (r'\\Z(\(\?|$)' , r'\1' , regex ) for regex in regexes )
310
-
311
340
flags = 0
312
341
if case_insensitive :
313
342
flags |= re .IGNORECASE
314
- compiled = re .compile (join_regex (regexes ), flags = flags )
315
-
343
+ rx = join_regex (map (_glob_to_regex , patterns ))
344
+ if not partial :
345
+ rx = rf"(?:{ rx } )\Z"
346
+ compiled = re .compile (rx , flags = flags )
316
347
return compiled
317
348
318
349
@@ -342,7 +373,7 @@ def pprint(self):
342
373
def add (self , pattern , result ):
343
374
"""Add the `pattern`/`result` pair to the list of aliases.
344
375
345
- `pattern` is an `fnmatch `-style pattern. `result` is a simple
376
+ `pattern` is an `glob `-style pattern. `result` is a simple
346
377
string. When mapping paths, if a path starts with a match against
347
378
`pattern`, then that match is replaced with `result`. This models
348
379
isomorphic source trees being rooted at different places on two
@@ -370,7 +401,7 @@ def add(self, pattern, result):
370
401
pattern += pattern_sep
371
402
372
403
# Make a regex from the pattern.
373
- regex = fnmatches_to_regex ([pattern ], case_insensitive = True , partial = True )
404
+ regex = globs_to_regex ([pattern ], case_insensitive = True , partial = True )
374
405
375
406
# Normalize the result: it must end with a path separator.
376
407
result_sep = sep (result )
0 commit comments