Refactor test subsets in CI workflows (#4788)

michaelosthege · web-flow · commit 3edf1ae54e70 · 2021-06-23T14:10:41.000+02:00
* Analyze included/excluded test files based on YAML job matrix Compared to the previous regex-based approach, this can distinguish by OS and floatX setting, allowing for more informative outputs. * Merge Windows workflow with pytest workflow Closes #4517 * Run more Windows tests and use cmd shell But don't run tests_distributions.py on Windows because it runs into aesara-devs/aesara#485
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -5,26 +5,29 @@ on:
   push:
     branches: [main]
 
+
+# Tests are split into multiple jobs to accelerate the CI.
+# Different jobs should be organized to take approximately the same
+# time to complete (and not be prohibitely slow).
+# Because GitHub Actions don't support YAML anchors, we have to place the
+# splitting of testfiles into groups in the strategy/matrix/test-subset
+# and can't re-use the groups across jobs.
+# A pre-commit hook (scripts/check_all_tests_are_covered.py)
+# enforces that test run just once per OS / floatX setting.
+
 jobs:
-  pytest:
+  ubuntu:
     strategy:
       matrix:
         os: [ubuntu-18.04]
         floatx: [float32, float64]
         test-subset:
-        # Tests are split into multiple jobs to accelerate the CI.
-        # Different jobs should be organized to take approximately the same
-        # time to complete (and not be prohibitely slow)
-        #
         # How this works:
         # 1st block: Only passes --ignore parameters to pytest.
         # → pytest will run all test_*.py files that are NOT ignored.
+        #
         # Subsequent blocks: Only pass paths to test files.
         # → pytest will run only these files
-        #
-        # Any test that was not ignored runs in the first job.
-        # A pre-commit hook (scripts/check_all_tests_are_covered.py)
-        # enforces that test run just once.
           - |
             --ignore=pymc3/tests/test_distributions_timeseries.py
             --ignore=pymc3/tests/test_mixture.py
@@ -128,3 +131,71 @@ jobs:
           env_vars: OS,PYTHON
           name: codecov-umbrella
           fail_ci_if_error: false
+  windows:
+    strategy:
+      matrix:
+        os: [windows-latest]
+        floatx: [float32, float64]
+        test-subset:
+          - |
+            pymc3/tests/test_distributions_random.py
+          - |
+            pymc3/tests/test_sampling.py
+            pymc3/tests/test_shared.py
+          - |
+            pymc3/tests/test_gp.py
+            pymc3/tests/test_ode.py
+          - |
+            pymc3/tests/test_model.py
+            pymc3/tests/test_model_func.py
+            pymc3/tests/test_modelcontext.py
+            pymc3/tests/test_pickling.py
+
+      fail-fast: false
+    runs-on: ${{ matrix.os }}
+    env:
+      TEST_SUBSET: ${{ matrix.test-subset }}
+      AESARA_FLAGS: floatX=${{ matrix.floatx }},gcc__cxxflags='-march=core2'
+    defaults:
+      run:
+        shell: cmd
+    steps:
+      - uses: actions/checkout@v2
+      - name: Cache conda
+        uses: actions/cache@v1
+        env:
+          # Increase this value to reset cache if conda-envs/environment-dev-py38.yml has not changed
+          CACHE_NUMBER: 0
+        with:
+          path: ~/conda_pkgs_dir
+          key: ${{ runner.os }}-conda-${{ env.CACHE_NUMBER }}-${{
+            hashFiles('conda-envs/windows-environment-dev-py38.yml') }}
+      - name: Cache multiple paths
+        uses: actions/cache@v2
+        env:
+          # Increase this value to reset cache if requirements.txt has not changed
+          CACHE_NUMBER: 0
+        with:
+          path: |
+            ~/.cache/pip
+            $RUNNER_TOOL_CACHE/Python/*
+            ~\AppData\Local\pip\Cache
+          key: ${{ runner.os }}-build-${{ matrix.python-version }}-${{
+            hashFiles('requirements.txt') }}
+      - uses: conda-incubator/setup-miniconda@v2
+        with:
+          activate-environment: pymc3-dev-py38
+          channel-priority: strict
+          environment-file: conda-envs/windows-environment-dev-py38.yml
+          use-only-tar-bz2: true # IMPORTANT: This needs to be set for caching to work properly!
+      - name: Install-pymc3
+        run: |
+          conda activate pymc3-dev-py38
+          pip install -e .
+          python --version
+      - name: Run tests
+        # This job uses a cmd shell, therefore the environment variable syntax is different!
+        # The ">-" in the next line replaces newlines with spaces (see https://stackoverflow.com/a/66809682).
+        run: >-
+          conda activate pymc3-dev-py38 &&
+          python -m pytest -vv --cov=pymc3 --cov-report=xml --cov-report term --durations=50 %TEST_SUBSET%
diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -36,6 +36,7 @@ repos:
 - repo: local
   hooks:
     - id: check-no-tests-are-ignored
+      additional_dependencies: [pandas,pyyaml]
       entry: python scripts/check_all_tests_are_covered.py
       files: ^\.github/workflows/pytest\.yml$
       language: python
diff --git a/scripts/check_all_tests_are_covered.py b/scripts/check_all_tests_are_covered.py
@@ -6,38 +6,93 @@
 This is intended to be used as a pre-commit hook, see `.pre-commit-config.yaml`.
 You can run it manually with `pre-commit run check-no-tests-are-ignored --all`.
 """
+import itertools
 import logging
-import re
+import os
 
 from pathlib import Path
 
+import pandas
+import yaml
+
 _log = logging.getLogger(__file__)
+logging.basicConfig(level=logging.DEBUG)
 
 
-if __name__ == "__main__":
-    testing_workflows = ["jaxtests.yml", "pytest.yml"]
-    ignored = set()
-    non_ignored = set()
-    for wfyml in testing_workflows:
-        pytest_ci_job = Path(".github") / "workflows" / wfyml
-        txt = pytest_ci_job.read_text()
-        ignored = set(re.findall(r"(?<=--ignore=)(pymc3/tests.*\.py)", txt))
-        non_ignored = non_ignored.union(set(re.findall(r"(?<!--ignore=)(pymc3/tests.*\.py)", txt)))
-    # Summarize
-    ignored_by_all = ignored.difference(non_ignored)
-    run_multiple_times = non_ignored.difference(ignored)
+def find_testfiles():
+    dp_repo = Path(__file__).parent.parent
+    all_tests = {
+        str(fp.relative_to(dp_repo)).replace(os.sep, "/")
+        for fp in (dp_repo / "pymc3" / "tests").glob("**/test_*.py")
+    }
+    _log.info("Found %i tests in total.", len(all_tests))
+    return all_tests
+
+
+def from_yaml():
+    """Determins how often each test file is run per platform and floatX setting.
+
+    An exception is raised if tests run multiple times with the same configuration.
+    """
+    # First collect the matrix definitions from testing workflows
+    matrices = {}
+    for wf in ["pytest.yml", "arviz_compat.yml", "jaxtests.yml"]:
+        wfname = wf.strip(".yml")
+        wfdef = yaml.safe_load(open(Path(".github", "workflows", wf)))
+        for jobname, jobdef in wfdef["jobs"].items():
+            matrix = jobdef.get("strategy", {}).get("matrix", {})
+            if matrix:
+                matrices[(wfname, jobname)] = matrix
+            else:
+                _log.warning("No matrix in %s/%s", wf, jobname)
+
+    # Now create an empty DataFrame to count based on OS/floatX/testfile
+    all_os = []
+    all_floatX = []
+    for matrix in matrices.values():
+        all_os += matrix["os"]
+        all_floatX += matrix["floatx"]
+    all_os = tuple(sorted(set(all_os)))
+    all_floatX = tuple(sorted(set(all_floatX)))
+    all_tests = find_testfiles()
+
+    df = pandas.DataFrame(
+        columns=pandas.MultiIndex.from_product(
+            [sorted(all_floatX), sorted(all_os)], names=["floatX", "os"]
+        ),
+        index=pandas.Index(sorted(all_tests), name="testfile"),
+    )
+    df.loc[:, :] = 0
+
+    # Count how often the testfiles are included in job definitions
+    for matrix in matrices.values():
+        for os_, floatX, subset in itertools.product(
+            matrix["os"], matrix["floatx"], matrix["test-subset"]
+        ):
+            testfiles = subset.split("\n")
+            ignored = {item.strip("--ignore=") for item in testfiles if item.startswith("--ignore")}
+            included = {item for item in testfiles if item and not item.startswith("--ignore")}
+            if ignored and not included:
+                # if no testfile is specified explicitly pytest runs all except the ignored ones
+                included = all_tests - ignored
+
+            for testfile in included:
+                df.loc[testfile, (floatX, os_)] += 1
+
+    ignored_by_all = set(df[df.eq(0).all(axis=1)].index)
+    run_multiple_times = set(df[df.gt(1).any(axis=1)].index)
+
+    # Print summary, warnings and raise errors on unwanted configurations
+    _log.info("Number of test runs (❌=0, ✅=once)\n%s", df.replace(0, "❌").replace(1, "✅"))
 
     if ignored_by_all:
-        _log.warning(
-            f"The following {len(ignored_by_all)} tests are completely ignored: {ignored_by_all}"
-        )
+        _log.warning("%i tests are completely ignored:\n%s", len(ignored_by_all), ignored_by_all)
     if run_multiple_times:
-        _log.warning(
-            f"The following {len(run_multiple_times)} tests are run multiple times: {run_multiple_times}"
+        raise Exception(
+            f"{len(run_multiple_times)} tests are run multiple times with the same OS and floatX setting:\n{run_multiple_times}"
         )
-    if not (ignored_by_all or run_multiple_times):
-        print(f"✔ All tests will run exactly once.")
+    return
+
 
-    # Temporarily disabled as we're bringing features back for v4:
-    # assert not ignored_by_all
-    assert not run_multiple_times
+if __name__ == "__main__":
+    from_yaml()