Skip to content

Commit ec6205a

Browse files
committed
fix: use glob matching instead of fnmatch. #1407
I didn't understand that fnmatch considers the entire string to be a filename, even if it has slashes in it. This led to incorrect matching. Now we use our own implementation of glob matching to get the correct behavior.
1 parent b3a1d97 commit ec6205a

File tree

10 files changed

+284
-117
lines changed

10 files changed

+284
-117
lines changed

CHANGES.rst

+5
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ development at the same time, such as 4.5.x and 5.0.
2020
Unreleased
2121
----------
2222

23+
- Fixes to file pattern matching, fixing `issue 1407`_. Previously, `*` would
24+
incorrectly match directory separators, making precise matching difficult.
25+
This is now fixed.
26+
2327
- Improvements to combining data files when using the
2428
:ref:`config_run_relative_files` setting:
2529

@@ -39,6 +43,7 @@ Unreleased
3943
implementations other than CPython or PyPy (`issue 1474`_).
4044

4145
.. _issue 991: https://github.com/nedbat/coveragepy/issues/991
46+
.. _issue 1407: https://github.com/nedbat/coveragepy/issues/1407
4247
.. _issue 1474: https://github.com/nedbat/coveragepy/issues/1474
4348
.. _issue 1481: https://github.com/nedbat/coveragepy/issues/1481
4449

coverage/files.py

+55-24
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33

44
"""File wrangling."""
55

6-
import fnmatch
76
import hashlib
87
import ntpath
98
import os
@@ -172,7 +171,7 @@ def isabs_anywhere(filename):
172171

173172

174173
def prep_patterns(patterns):
175-
"""Prepare the file patterns for use in a `FnmatchMatcher`.
174+
"""Prepare the file patterns for use in a `GlobMatcher`.
176175
177176
If a pattern starts with a wildcard, it is used as a pattern
178177
as-is. If it does not start with a wildcard, then it is made
@@ -253,15 +252,15 @@ def match(self, module_name):
253252
return False
254253

255254

256-
class FnmatchMatcher:
255+
class GlobMatcher:
257256
"""A matcher for files by file name pattern."""
258257
def __init__(self, pats, name="unknown"):
259258
self.pats = list(pats)
260-
self.re = fnmatches_to_regex(self.pats, case_insensitive=env.WINDOWS)
259+
self.re = globs_to_regex(self.pats, case_insensitive=env.WINDOWS)
261260
self.name = name
262261

263262
def __repr__(self):
264-
return f"<FnmatchMatcher {self.name} {self.pats!r}>"
263+
return f"<GlobMatcher {self.name} {self.pats!r}>"
265264

266265
def info(self):
267266
"""A list of strings for displaying when dumping state."""
@@ -282,37 +281,69 @@ def sep(s):
282281
return the_sep
283282

284283

285-
def fnmatches_to_regex(patterns, case_insensitive=False, partial=False):
286-
"""Convert fnmatch patterns to a compiled regex that matches any of them.
284+
# Tokenizer for _glob_to_regex.
285+
# None as a sub means disallowed.
286+
G2RX_TOKENS = [(re.compile(rx), sub) for rx, sub in [
287+
(r"\*\*\*+", None), # Can't have ***
288+
(r"[^/]+\*\*+", None), # Can't have x**
289+
(r"\*\*+[^/]+", None), # Can't have **x
290+
(r"\*\*/\*\*", None), # Can't have **/**
291+
(r"^\*+/", r"(.*[/\\\\])?"), # ^*/ matches any prefix-slash, or nothing.
292+
(r"/\*+$", r"[/\\\\].*"), # /*$ matches any slash-suffix.
293+
(r"\*\*/", r"(.*[/\\\\])?"), # **/ matches any subdirs, including none
294+
(r"/", r"[/\\\\]"), # / matches either slash or backslash
295+
(r"\*", r"[^/\\\\]*"), # * matches any number of non slash-likes
296+
(r"\?", r"[^/\\\\]"), # ? matches one non slash-like
297+
(r"\[.*?\]", r"\g<0>"), # [a-f] matches [a-f]
298+
(r"[a-zA-Z0-9_-]+", r"\g<0>"), # word chars match themselves
299+
(r"[\[\]+{}]", None), # Can't have regex special chars
300+
(r".", r"\\\g<0>"), # Anything else is escaped to be safe
301+
]]
302+
303+
def _glob_to_regex(pattern):
304+
"""Convert a file-path glob pattern into a regex."""
305+
# Turn all backslashes into slashes to simplify the tokenizer.
306+
pattern = pattern.replace("\\", "/")
307+
if "/" not in pattern:
308+
pattern = "**/" + pattern
309+
path_rx = []
310+
pos = 0
311+
while pos < len(pattern):
312+
for rx, sub in G2RX_TOKENS:
313+
m = rx.match(pattern, pos=pos)
314+
if m:
315+
if sub is None:
316+
raise ConfigError(f"File pattern can't include {m[0]!r}")
317+
path_rx.append(m.expand(sub))
318+
pos = m.end()
319+
break
320+
return "".join(path_rx)
321+
322+
323+
def globs_to_regex(patterns, case_insensitive=False, partial=False):
324+
"""Convert glob patterns to a compiled regex that matches any of them.
287325
288326
Slashes are always converted to match either slash or backslash, for
289327
Windows support, even when running elsewhere.
290328
329+
If the pattern has no slash or backslash, then it is interpreted as
330+
matching a file name anywhere it appears in the tree. Otherwise, the glob
331+
pattern must match the whole file path.
332+
291333
If `partial` is true, then the pattern will match if the target string
292334
starts with the pattern. Otherwise, it must match the entire string.
293335
294336
Returns: a compiled regex object. Use the .match method to compare target
295337
strings.
296338
297339
"""
298-
regexes = (fnmatch.translate(pattern) for pattern in patterns)
299-
# */ at the start should also match nothing.
300-
regexes = (re.sub(r"^\(\?s:\.\*(\\\\|/)", r"(?s:^(.*\1)?", regex) for regex in regexes)
301-
# Be agnostic: / can mean backslash or slash.
302-
regexes = (re.sub(r"/", r"[\\\\/]", regex) for regex in regexes)
303-
304-
if partial:
305-
# fnmatch always adds a \Z to match the whole string, which we don't
306-
# want, so we remove the \Z. While removing it, we only replace \Z if
307-
# followed by paren (introducing flags), or at end, to keep from
308-
# destroying a literal \Z in the pattern.
309-
regexes = (re.sub(r'\\Z(\(\?|$)', r'\1', regex) for regex in regexes)
310-
311340
flags = 0
312341
if case_insensitive:
313342
flags |= re.IGNORECASE
314-
compiled = re.compile(join_regex(regexes), flags=flags)
315-
343+
rx = join_regex(map(_glob_to_regex, patterns))
344+
if not partial:
345+
rx = rf"(?:{rx})\Z"
346+
compiled = re.compile(rx, flags=flags)
316347
return compiled
317348

318349

@@ -342,7 +373,7 @@ def pprint(self):
342373
def add(self, pattern, result):
343374
"""Add the `pattern`/`result` pair to the list of aliases.
344375
345-
`pattern` is an `fnmatch`-style pattern. `result` is a simple
376+
`pattern` is an `glob`-style pattern. `result` is a simple
346377
string. When mapping paths, if a path starts with a match against
347378
`pattern`, then that match is replaced with `result`. This models
348379
isomorphic source trees being rooted at different places on two
@@ -370,7 +401,7 @@ def add(self, pattern, result):
370401
pattern += pattern_sep
371402

372403
# Make a regex from the pattern.
373-
regex = fnmatches_to_regex([pattern], case_insensitive=True, partial=True)
404+
regex = globs_to_regex([pattern], case_insensitive=True, partial=True)
374405

375406
# Normalize the result: it must end with a path separator.
376407
result_sep = sep(result)

coverage/inorout.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from coverage import env
1717
from coverage.disposition import FileDisposition, disposition_init
1818
from coverage.exceptions import CoverageException, PluginError
19-
from coverage.files import TreeMatcher, FnmatchMatcher, ModuleMatcher
19+
from coverage.files import TreeMatcher, GlobMatcher, ModuleMatcher
2020
from coverage.files import prep_patterns, find_python_files, canonical_filename
2121
from coverage.misc import sys_modules_saved
2222
from coverage.python import source_for_file, source_for_morf
@@ -260,10 +260,10 @@ def debug(msg):
260260
self.pylib_match = TreeMatcher(self.pylib_paths, "pylib")
261261
debug(f"Python stdlib matching: {self.pylib_match!r}")
262262
if self.include:
263-
self.include_match = FnmatchMatcher(self.include, "include")
263+
self.include_match = GlobMatcher(self.include, "include")
264264
debug(f"Include matching: {self.include_match!r}")
265265
if self.omit:
266-
self.omit_match = FnmatchMatcher(self.omit, "omit")
266+
self.omit_match = GlobMatcher(self.omit, "omit")
267267
debug(f"Omit matching: {self.omit_match!r}")
268268

269269
self.cover_match = TreeMatcher(self.cover_paths, "coverage")

coverage/report.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import sys
77

88
from coverage.exceptions import CoverageException, NoDataError, NotPython
9-
from coverage.files import prep_patterns, FnmatchMatcher
9+
from coverage.files import prep_patterns, GlobMatcher
1010
from coverage.misc import ensure_dir_for_file, file_be_gone
1111

1212

@@ -57,11 +57,11 @@ def get_analysis_to_report(coverage, morfs):
5757
config = coverage.config
5858

5959
if config.report_include:
60-
matcher = FnmatchMatcher(prep_patterns(config.report_include), "report_include")
60+
matcher = GlobMatcher(prep_patterns(config.report_include), "report_include")
6161
file_reporters = [fr for fr in file_reporters if matcher.match(fr.filename)]
6262

6363
if config.report_omit:
64-
matcher = FnmatchMatcher(prep_patterns(config.report_omit), "report_omit")
64+
matcher = GlobMatcher(prep_patterns(config.report_omit), "report_omit")
6565
file_reporters = [fr for fr in file_reporters if not matcher.match(fr.filename)]
6666

6767
if not file_reporters:

doc/cmd.rst

+23-17
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ single directory, and use the **combine** command to combine them into one
342342

343343
$ coverage combine
344344

345-
You can also name directories or files on the command line::
345+
You can also name directories or files to be combined on the command line::
346346

347347
$ coverage combine data1.dat windows_data_files/
348348

@@ -364,22 +364,6 @@ An existing combined data file is ignored and re-written. If you want to use
364364
runs, use the ``--append`` switch on the **combine** command. This behavior
365365
was the default before version 4.2.
366366

367-
To combine data for a source file, coverage has to find its data in each of the
368-
data files. Different test runs may run the same source file from different
369-
locations. For example, different operating systems will use different paths
370-
for the same file, or perhaps each Python version is run from a different
371-
subdirectory. Coverage needs to know that different file paths are actually
372-
the same source file for reporting purposes.
373-
374-
You can tell coverage.py how different source locations relate with a
375-
``[paths]`` section in your configuration file (see :ref:`config_paths`).
376-
It might be more convenient to use the ``[run] relative_files``
377-
setting to store relative file paths (see :ref:`relative_files
378-
<config_run_relative_files>`).
379-
380-
If data isn't combining properly, you can see details about the inner workings
381-
with ``--debug=pathmap``.
382-
383367
If any of the data files can't be read, coverage.py will print a warning
384368
indicating the file and the problem.
385369

@@ -414,6 +398,28 @@ want to keep those files, use the ``--keep`` command-line option.
414398
.. [[[end]]] (checksum: 0bdd83f647ee76363c955bedd9ddf749)
415399
416400
401+
.. _cmd_combine_remapping:
402+
403+
Re-mapping paths
404+
................
405+
406+
To combine data for a source file, coverage has to find its data in each of the
407+
data files. Different test runs may run the same source file from different
408+
locations. For example, different operating systems will use different paths
409+
for the same file, or perhaps each Python version is run from a different
410+
subdirectory. Coverage needs to know that different file paths are actually
411+
the same source file for reporting purposes.
412+
413+
You can tell coverage.py how different source locations relate with a
414+
``[paths]`` section in your configuration file (see :ref:`config_paths`).
415+
It might be more convenient to use the ``[run] relative_files``
416+
setting to store relative file paths (see :ref:`relative_files
417+
<config_run_relative_files>`).
418+
419+
If data isn't combining properly, you can see details about the inner workings
420+
with ``--debug=pathmap``.
421+
422+
417423
.. _cmd_erase:
418424

419425
Erase data: ``coverage erase``

doc/config.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ The first list that has a match will be used.
357357
The ``--debug=pathmap`` option can be used to log details of the re-mapping of
358358
paths. See :ref:`the --debug option <cmd_run_debug>`.
359359

360-
See :ref:`cmd_combine` for more information.
360+
See :ref:`cmd_combine_remapping` and :ref:`source_glob` for more information.
361361

362362

363363
.. _config_report:

doc/source.rst

+24-5
Original file line numberDiff line numberDiff line change
@@ -59,10 +59,10 @@ removed from the set.
5959

6060
.. highlight:: ini
6161

62-
The ``include`` and ``omit`` file name patterns follow typical shell syntax:
63-
``*`` matches any number of characters and ``?`` matches a single character.
64-
Patterns that start with a wildcard character are used as-is, other patterns
65-
are interpreted relative to the current directory::
62+
The ``include`` and ``omit`` file name patterns follow common shell syntax,
63+
described below in :ref:`source_glob`. Patterns that start with a wildcard
64+
character are used as-is, other patterns are interpreted relative to the
65+
current directory::
6666

6767
[run]
6868
omit =
@@ -77,7 +77,7 @@ The ``source``, ``include``, and ``omit`` values all work together to determine
7777
the source that will be measured.
7878

7979
If both ``source`` and ``include`` are set, the ``include`` value is ignored
80-
and a warning is printed on the standard output.
80+
and a warning is issued.
8181

8282

8383
.. _source_reporting:
@@ -103,3 +103,22 @@ reporting.
103103

104104
Note that these are ways of specifying files to measure. You can also exclude
105105
individual source lines. See :ref:`excluding` for details.
106+
107+
108+
.. _source_glob:
109+
110+
File patterns
111+
-------------
112+
113+
File path patterns are used for include and omit, and for combining path
114+
remapping. They follow common shell syntax:
115+
116+
- ``*`` matches any number of file name characters, not including the directory
117+
separator.
118+
119+
- ``?`` matches a single file name character.
120+
121+
- ``**`` matches any number of nested directory names, including none.
122+
123+
- Both ``/`` and ``\`` will match either a slash or a backslash, to make
124+
cross-platform matching easier.

tests/test_api.py

-1
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ def test_unexecuted_file(self):
7171
assert missing == [1]
7272

7373
def test_filenames(self):
74-
7574
self.make_file("mymain.py", """\
7675
import mymod
7776
a = 1

0 commit comments

Comments
 (0)