Skip to content

Commit 3edf1ae

Browse files
Refactor test subsets in CI workflows (#4788)
* Analyze included/excluded test files based on YAML job matrix Compared to the previous regex-based approach, this can distinguish by OS and floatX setting, allowing for more informative outputs. * Merge Windows workflow with pytest workflow Closes #4517 * Run more Windows tests and use cmd shell But don't run tests_distributions.py on Windows because it runs into aesara-devs/aesara#485
1 parent b25d0e0 commit 3edf1ae

File tree

4 files changed

+159
-92
lines changed

4 files changed

+159
-92
lines changed

.github/workflows/pytest.yml

Lines changed: 80 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,26 +5,29 @@ on:
55
push:
66
branches: [main]
77

8+
9+
# Tests are split into multiple jobs to accelerate the CI.
10+
# Different jobs should be organized to take approximately the same
11+
# time to complete (and not be prohibitely slow).
12+
# Because GitHub Actions don't support YAML anchors, we have to place the
13+
# splitting of testfiles into groups in the strategy/matrix/test-subset
14+
# and can't re-use the groups across jobs.
15+
# A pre-commit hook (scripts/check_all_tests_are_covered.py)
16+
# enforces that test run just once per OS / floatX setting.
17+
818
jobs:
9-
pytest:
19+
ubuntu:
1020
strategy:
1121
matrix:
1222
os: [ubuntu-18.04]
1323
floatx: [float32, float64]
1424
test-subset:
15-
# Tests are split into multiple jobs to accelerate the CI.
16-
# Different jobs should be organized to take approximately the same
17-
# time to complete (and not be prohibitely slow)
18-
#
1925
# How this works:
2026
# 1st block: Only passes --ignore parameters to pytest.
2127
# → pytest will run all test_*.py files that are NOT ignored.
28+
#
2229
# Subsequent blocks: Only pass paths to test files.
2330
# → pytest will run only these files
24-
#
25-
# Any test that was not ignored runs in the first job.
26-
# A pre-commit hook (scripts/check_all_tests_are_covered.py)
27-
# enforces that test run just once.
2831
- |
2932
--ignore=pymc3/tests/test_distributions_timeseries.py
3033
--ignore=pymc3/tests/test_mixture.py
@@ -128,3 +131,71 @@ jobs:
128131
env_vars: OS,PYTHON
129132
name: codecov-umbrella
130133
fail_ci_if_error: false
134+
windows:
135+
strategy:
136+
matrix:
137+
os: [windows-latest]
138+
floatx: [float32, float64]
139+
test-subset:
140+
- |
141+
pymc3/tests/test_distributions_random.py
142+
- |
143+
pymc3/tests/test_sampling.py
144+
pymc3/tests/test_shared.py
145+
- |
146+
pymc3/tests/test_gp.py
147+
pymc3/tests/test_ode.py
148+
- |
149+
pymc3/tests/test_model.py
150+
pymc3/tests/test_model_func.py
151+
pymc3/tests/test_modelcontext.py
152+
pymc3/tests/test_pickling.py
153+
154+
fail-fast: false
155+
runs-on: ${{ matrix.os }}
156+
env:
157+
TEST_SUBSET: ${{ matrix.test-subset }}
158+
AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=core2'
159+
defaults:
160+
run:
161+
shell: cmd
162+
steps:
163+
- uses: actions/checkout@v2
164+
- name: Cache conda
165+
uses: actions/cache@v1
166+
env:
167+
# Increase this value to reset cache if conda-envs/environment-dev-py38.yml has not changed
168+
CACHE_NUMBER: 0
169+
with:
170+
path: ~/conda_pkgs_dir
171+
key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
172+
hashFiles('conda-envs/windows-environment-dev-py38.yml') }}
173+
- name: Cache multiple paths
174+
uses: actions/cache@v2
175+
env:
176+
# Increase this value to reset cache if requirements.txt has not changed
177+
CACHE_NUMBER: 0
178+
with:
179+
path: |
180+
~/.cache/pip
181+
$RUNNER_TOOL_CACHE/Python/*
182+
~\AppData\Local\pip\Cache
183+
key: ${{ runner.os }}-build-${{ matrix.python-version }}-${{
184+
hashFiles('requirements.txt') }}
185+
- uses: conda-incubator/setup-miniconda@v2
186+
with:
187+
activate-environment: pymc3-dev-py38
188+
channel-priority: strict
189+
environment-file: conda-envs/windows-environment-dev-py38.yml
190+
use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
191+
- name: Install-pymc3
192+
run: |
193+
conda activate pymc3-dev-py38
194+
pip install -e .
195+
python --version
196+
- name: Run tests
197+
# This job uses a cmd shell, therefore the environment variable syntax is different!
198+
# The ">-" in the next line replaces newlines with spaces (see https://stackoverflow.com/a/66809682).
199+
run: >-
200+
conda activate pymc3-dev-py38 &&
201+
python -m pytest -vv --cov=pymc3 --cov-report=xml --cov-report term --durations=50 %TEST_SUBSET%

.github/workflows/windows.yml

Lines changed: 0 additions & 60 deletions
This file was deleted.

.pre-commit-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ repos:
3636
- repo: local
3737
hooks:
3838
- id: check-no-tests-are-ignored
39+
additional_dependencies: [pandas,pyyaml]
3940
entry: python scripts/check_all_tests_are_covered.py
4041
files: ^\.github/workflows/pytest\.yml$
4142
language: python

scripts/check_all_tests_are_covered.py

Lines changed: 78 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6,38 +6,93 @@
66
This is intended to be used as a pre-commit hook, see `.pre-commit-config.yaml`.
77
You can run it manually with `pre-commit run check-no-tests-are-ignored --all`.
88
"""
9+
import itertools
910
import logging
10-
import re
11+
import os
1112

1213
from pathlib import Path
1314

15+
import pandas
16+
import yaml
17+
1418
_log = logging.getLogger(__file__)
19+
logging.basicConfig(level=logging.DEBUG)
1520

1621

17-
if __name__ == "__main__":
18-
testing_workflows = ["jaxtests.yml", "pytest.yml"]
19-
ignored = set()
20-
non_ignored = set()
21-
for wfyml in testing_workflows:
22-
pytest_ci_job = Path(".github") / "workflows" / wfyml
23-
txt = pytest_ci_job.read_text()
24-
ignored = set(re.findall(r"(?<=--ignore=)(pymc3/tests.*\.py)", txt))
25-
non_ignored = non_ignored.union(set(re.findall(r"(?<!--ignore=)(pymc3/tests.*\.py)", txt)))
26-
# Summarize
27-
ignored_by_all = ignored.difference(non_ignored)
28-
run_multiple_times = non_ignored.difference(ignored)
22+
def find_testfiles():
23+
dp_repo = Path(__file__).parent.parent
24+
all_tests = {
25+
str(fp.relative_to(dp_repo)).replace(os.sep, "/")
26+
for fp in (dp_repo / "pymc3" / "tests").glob("**/test_*.py")
27+
}
28+
_log.info("Found %i tests in total.", len(all_tests))
29+
return all_tests
30+
31+
32+
def from_yaml():
33+
"""Determins how often each test file is run per platform and floatX setting.
34+
35+
An exception is raised if tests run multiple times with the same configuration.
36+
"""
37+
# First collect the matrix definitions from testing workflows
38+
matrices = {}
39+
for wf in ["pytest.yml", "arviz_compat.yml", "jaxtests.yml"]:
40+
wfname = wf.strip(".yml")
41+
wfdef = yaml.safe_load(open(Path(".github", "workflows", wf)))
42+
for jobname, jobdef in wfdef["jobs"].items():
43+
matrix = jobdef.get("strategy", {}).get("matrix", {})
44+
if matrix:
45+
matrices[(wfname, jobname)] = matrix
46+
else:
47+
_log.warning("No matrix in %s/%s", wf, jobname)
48+
49+
# Now create an empty DataFrame to count based on OS/floatX/testfile
50+
all_os = []
51+
all_floatX = []
52+
for matrix in matrices.values():
53+
all_os += matrix["os"]
54+
all_floatX += matrix["floatx"]
55+
all_os = tuple(sorted(set(all_os)))
56+
all_floatX = tuple(sorted(set(all_floatX)))
57+
all_tests = find_testfiles()
58+
59+
df = pandas.DataFrame(
60+
columns=pandas.MultiIndex.from_product(
61+
[sorted(all_floatX), sorted(all_os)], names=["floatX", "os"]
62+
),
63+
index=pandas.Index(sorted(all_tests), name="testfile"),
64+
)
65+
df.loc[:, :] = 0
66+
67+
# Count how often the testfiles are included in job definitions
68+
for matrix in matrices.values():
69+
for os_, floatX, subset in itertools.product(
70+
matrix["os"], matrix["floatx"], matrix["test-subset"]
71+
):
72+
testfiles = subset.split("\n")
73+
ignored = {item.strip("--ignore=") for item in testfiles if item.startswith("--ignore")}
74+
included = {item for item in testfiles if item and not item.startswith("--ignore")}
75+
if ignored and not included:
76+
# if no testfile is specified explicitly pytest runs all except the ignored ones
77+
included = all_tests - ignored
78+
79+
for testfile in included:
80+
df.loc[testfile, (floatX, os_)] += 1
81+
82+
ignored_by_all = set(df[df.eq(0).all(axis=1)].index)
83+
run_multiple_times = set(df[df.gt(1).any(axis=1)].index)
84+
85+
# Print summary, warnings and raise errors on unwanted configurations
86+
_log.info("Number of test runs (❌=0, ✅=once)\n%s", df.replace(0, "❌").replace(1, "✅"))
2987

3088
if ignored_by_all:
31-
_log.warning(
32-
f"The following {len(ignored_by_all)} tests are completely ignored: {ignored_by_all}"
33-
)
89+
_log.warning("%i tests are completely ignored:\n%s", len(ignored_by_all), ignored_by_all)
3490
if run_multiple_times:
35-
_log.warning(
36-
f"The following {len(run_multiple_times)} tests are run multiple times: {run_multiple_times}"
91+
raise Exception(
92+
f"{len(run_multiple_times)} tests are run multiple times with the same OS and floatX setting:\n{run_multiple_times}"
3793
)
38-
if not (ignored_by_all or run_multiple_times):
39-
print(f"✔ All tests will run exactly once.")
94+
return
95+
4096

41-
# Temporarily disabled as we're bringing features back for v4:
42-
# assert not ignored_by_all
43-
assert not run_multiple_times
97+
if __name__ == "__main__":
98+
from_yaml()

0 commit comments

Comments
 (0)