From 37ac83c91bf29447a559a34209984f578c6f8a87 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 19 Jan 2022 13:50:49 -0800 Subject: [PATCH 01/81] CI: Align Azure & GHA dependencies --- azure-pipelines.yml | 5 ++-- ci/azure/posix.yml | 36 +++++++--------------------- ci/azure/windows.yml | 43 ++++++---------------------------- ci/deps/azure-macos-310.yaml | 36 ---------------------------- ci/deps/azure-macos-38.yaml | 36 ---------------------------- ci/deps/azure-macos-39.yaml | 36 ---------------------------- ci/deps/azure-windows-310.yaml | 41 -------------------------------- ci/deps/azure-windows-38.yaml | 35 --------------------------- ci/deps/azure-windows-39.yaml | 40 ------------------------------- 9 files changed, 18 insertions(+), 290 deletions(-) delete mode 100644 ci/deps/azure-macos-310.yaml delete mode 100644 ci/deps/azure-macos-38.yaml delete mode 100644 ci/deps/azure-macos-39.yaml delete mode 100644 ci/deps/azure-windows-310.yaml delete mode 100644 ci/deps/azure-windows-38.yaml delete mode 100644 ci/deps/azure-windows-39.yaml diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 9c04d10707a64..66657ac6c9d54 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,16 +20,15 @@ variables: PYTEST_TARGET: pandas jobs: -# Mac and Linux use the same template - template: ci/azure/posix.yml parameters: name: macOS - vmImage: macOS-10.15 + vmImage: macOS-latest - template: ci/azure/windows.yml parameters: name: Windows - vmImage: windows-2019 + vmImage: windows-latest - job: py38_32bit pool: diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 02a4a9ad44865..4cd202c9f7b11 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -4,40 +4,22 @@ parameters: jobs: - job: ${{ parameters.name }} + timeoutInMinutes: 0 pool: vmImage: ${{ parameters.vmImage }} strategy: matrix: - py38_macos_1: - ENV_FILE: ci/deps/azure-macos-38.yaml + py38: + ENV_FILE: ci/deps/actions-38.yaml CONDA_PY: "38" - PATTERN: "not slow" - PYTEST_TARGET: "pandas/tests/[a-h]*" - py38_macos_2: - ENV_FILE: ci/deps/azure-macos-38.yaml - CONDA_PY: "38" - PATTERN: "not slow" - PYTEST_TARGET: "pandas/tests/[i-z]*" - py39_macos_1: - ENV_FILE: ci/deps/azure-macos-39.yaml - CONDA_PY: "39" - PATTERN: "not slow" - PYTEST_TARGET: "pandas/tests/[a-h]*" - py39_macos_2: - ENV_FILE: ci/deps/azure-macos-39.yaml + + py39: + ENV_FILE: ci/deps/actions-39.yaml CONDA_PY: "39" - PATTERN: "not slow" - PYTEST_TARGET: "pandas/tests/[i-z]*" - py310_macos_1: - ENV_FILE: ci/deps/azure-macos-310.yaml - CONDA_PY: "310" - PATTERN: "not slow" - PYTEST_TARGET: "pandas/tests/[a-h]*" - py310_macos_2: - ENV_FILE: ci/deps/azure-macos-310.yaml + + py310: + ENV_FILE: ci/deps/actions-310.yaml CONDA_PY: "310" - PATTERN: "not slow" - PYTEST_TARGET: "pandas/tests/[i-z]*" steps: - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 7061a266f28c7..97fd8c34e02e6 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,51 +4,22 @@ parameters: jobs: - job: ${{ parameters.name }} + timeoutInMinutes: 0 pool: vmImage: ${{ parameters.vmImage }} strategy: matrix: - py38_np18_1: - ENV_FILE: ci/deps/azure-windows-38.yaml + py38: + ENV_FILE: ci/deps/actions-38.yaml CONDA_PY: "38" - PATTERN: "not slow" - PYTEST_WORKERS: 2 # GH-42236 - PYTEST_TARGET: "pandas/tests/[a-h]*" - py38_np18_2: - ENV_FILE: ci/deps/azure-windows-38.yaml - CONDA_PY: "38" - PATTERN: "not slow" - PYTEST_WORKERS: 2 # GH-42236 - PYTEST_TARGET: "pandas/tests/[i-z]*" - - py39_1: - ENV_FILE: ci/deps/azure-windows-39.yaml - CONDA_PY: "39" - PATTERN: "not slow and not high_memory" - PYTEST_WORKERS: 2 # GH-42236 - PYTEST_TARGET: "pandas/tests/[a-h]*" - - py39_2: - ENV_FILE: ci/deps/azure-windows-39.yaml + py39: + ENV_FILE: ci/deps/actions-39.yaml CONDA_PY: "39" - PATTERN: "not slow and not high_memory" - PYTEST_WORKERS: 2 # GH-42236 - PYTEST_TARGET: "pandas/tests/[i-z]*" - - py310_1: - ENV_FILE: ci/deps/azure-windows-310.yaml - CONDA_PY: "310" - PATTERN: "not slow and not high_memory" - PYTEST_WORKERS: 2 # GH-42236 - PYTEST_TARGET: "pandas/tests/[a-h]*" - py310_2: - ENV_FILE: ci/deps/azure-windows-310.yaml + py310: + ENV_FILE: ci/deps/actions-310.yaml CONDA_PY: "310" - PATTERN: "not slow and not high_memory" - PYTEST_WORKERS: 2 # GH-42236 - PYTEST_TARGET: "pandas/tests/[i-z]*" steps: - powershell: | diff --git a/ci/deps/azure-macos-310.yaml b/ci/deps/azure-macos-310.yaml deleted file mode 100644 index 312fac8091db6..0000000000000 --- a/ci/deps/azure-macos-310.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: pandas-dev -channels: - - defaults - - conda-forge -dependencies: - - python=3.10 - - # tools - - cython>=0.29.24 - - pytest>=6.0 - - pytest-xdist>=1.31 - - hypothesis>=5.5.3 - - pytest-azurepipelines - - # pandas dependencies - - beautifulsoup4 - - bottleneck - - html5lib - - jinja2 - - lxml - - matplotlib - - nomkl - - numexpr - - numpy - - openpyxl - - pyarrow - - pyreadstat - - pytables - - python-dateutil==2.8.1 - - pytz - - pyxlsb - - xarray - - xlrd - - xlsxwriter - - xlwt - - zstandard diff --git a/ci/deps/azure-macos-38.yaml b/ci/deps/azure-macos-38.yaml deleted file mode 100644 index 422aa86c57fc7..0000000000000 --- a/ci/deps/azure-macos-38.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: pandas-dev -channels: - - defaults - - conda-forge -dependencies: - - python=3.8 - - # tools - - cython>=0.29.24 - - pytest>=6.0 - - pytest-xdist>=1.31 - - hypothesis>=5.5.3 - - pytest-azurepipelines - - # pandas dependencies - - beautifulsoup4 - - bottleneck - - html5lib - - jinja2 - - lxml - - matplotlib=3.3.2 - - nomkl - - numexpr - - numpy=1.18.5 - - openpyxl - - pyarrow=1.0.1 - - pyreadstat - - pytables - - python-dateutil==2.8.1 - - pytz - - pyxlsb - - xarray - - xlrd - - xlsxwriter - - xlwt - - zstandard diff --git a/ci/deps/azure-macos-39.yaml b/ci/deps/azure-macos-39.yaml deleted file mode 100644 index 140d67796452c..0000000000000 --- a/ci/deps/azure-macos-39.yaml +++ /dev/null @@ -1,36 +0,0 @@ -name: pandas-dev -channels: - - defaults - - conda-forge -dependencies: - - python=3.9 - - # tools - - cython>=0.29.24 - - pytest>=6.0 - - pytest-xdist>=1.31 - - hypothesis>=5.5.3 - - pytest-azurepipelines - - # pandas dependencies - - beautifulsoup4 - - bottleneck - - html5lib - - jinja2 - - lxml - - matplotlib=3.3.2 - - nomkl - - numexpr - - numpy=1.21.3 - - openpyxl - - pyarrow=4 - - pyreadstat - - pytables - - python-dateutil==2.8.1 - - pytz - - pyxlsb - - xarray - - xlrd - - xlsxwriter - - xlwt - - zstandard diff --git a/ci/deps/azure-windows-310.yaml b/ci/deps/azure-windows-310.yaml deleted file mode 100644 index 8e6f4deef6057..0000000000000 --- a/ci/deps/azure-windows-310.yaml +++ /dev/null @@ -1,41 +0,0 @@ -name: pandas-dev -channels: - - conda-forge - - defaults -dependencies: - - python=3.10 - - # tools - - cython>=0.29.24 - - pytest>=6.0 - - pytest-xdist>=1.31 - - hypothesis>=5.5.3 - - pytest-azurepipelines - - # pandas dependencies - - beautifulsoup4 - - bottleneck - - fsspec>=0.8.0 - - gcsfs - - html5lib - - jinja2 - - lxml - - matplotlib - # TODO: uncomment after numba supports py310 - #- numba - - numexpr - - numpy - - openpyxl - - pyarrow - - pytables - - python-dateutil - - pytz - - s3fs>=0.4.2 - - scipy - - sqlalchemy - - xlrd - - xlsxwriter - - xlwt - - pyreadstat - - pyxlsb - - zstandard diff --git a/ci/deps/azure-windows-38.yaml b/ci/deps/azure-windows-38.yaml deleted file mode 100644 index eb533524147d9..0000000000000 --- a/ci/deps/azure-windows-38.yaml +++ /dev/null @@ -1,35 +0,0 @@ -name: pandas-dev -channels: - - conda-forge - - defaults -dependencies: - - python=3.8 - - # tools - - cython>=0.29.24 - - pytest>=6.0 - - pytest-xdist>=1.31 - - hypothesis>=5.5.3 - - pytest-azurepipelines - - # pandas dependencies - - blosc - - bottleneck - - fastparquet>=0.4.0 - - fsspec>=0.8.0 - - matplotlib=3.3.2 - - numba - - numexpr - - numpy=1.18 - - openpyxl - - jinja2 - - pyarrow=2 - - pytables - - python-dateutil - - pytz - - s3fs>=0.4.0 - - scipy - - xlrd - - xlsxwriter - - xlwt - - zstandard diff --git a/ci/deps/azure-windows-39.yaml b/ci/deps/azure-windows-39.yaml deleted file mode 100644 index 6f820b1c2aedb..0000000000000 --- a/ci/deps/azure-windows-39.yaml +++ /dev/null @@ -1,40 +0,0 @@ -name: pandas-dev -channels: - - conda-forge - - defaults -dependencies: - - python=3.9 - - # tools - - cython>=0.29.24 - - pytest>=6.0 - - pytest-xdist>=1.31 - - hypothesis>=5.5.3 - - pytest-azurepipelines - - # pandas dependencies - - beautifulsoup4 - - bottleneck - - fsspec>=0.8.0 - - gcsfs - - html5lib - - jinja2 - - lxml - - matplotlib - - numba - - numexpr - - numpy - - openpyxl - - pyarrow=6 - - pytables - - python-dateutil - - pytz - - s3fs>=0.4.2 - - scipy - - sqlalchemy - - xlrd - - xlsxwriter - - xlwt - - pyreadstat - - pyxlsb - - zstandard From 3327cc350985d5563349d1005e6f9453d36efafd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 19 Jan 2022 13:55:00 -0800 Subject: [PATCH 02/81] Disable GHA for testing Azure --- .github/workflows/code-checks.yml | 3 +++ .github/workflows/datamanger.yml | 1 + .github/workflows/posix.yml | 1 + .github/workflows/sdist.yml | 1 + 4 files changed, 6 insertions(+) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 7141b02cac376..a54e841535b78 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -16,6 +16,7 @@ env: jobs: pre_commit: + if: false name: pre-commit runs-on: ubuntu-latest concurrency: @@ -35,6 +36,7 @@ jobs: uses: pre-commit/action@v2.0.3 typing_and_docstring_validation: + if: false name: Docstring and typing validation runs-on: ubuntu-latest defaults: @@ -101,6 +103,7 @@ jobs: if: ${{ steps.build.outcome == 'success' }} asv-benchmarks: + if: false name: ASV Benchmarks runs-on: ubuntu-latest defaults: diff --git a/.github/workflows/datamanger.yml b/.github/workflows/datamanger.yml index 3fc515883a225..fa66e516723f1 100644 --- a/.github/workflows/datamanger.yml +++ b/.github/workflows/datamanger.yml @@ -16,6 +16,7 @@ env: jobs: data_manager: + if: false name: Test experimental data manager runs-on: ubuntu-latest services: diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 135ca0703de8b..01e255f1f7293 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -18,6 +18,7 @@ env: jobs: pytest: + if: false runs-on: ubuntu-latest defaults: run: diff --git a/.github/workflows/sdist.yml b/.github/workflows/sdist.yml index dd030f1aacc44..ca2e329436ae4 100644 --- a/.github/workflows/sdist.yml +++ b/.github/workflows/sdist.yml @@ -14,6 +14,7 @@ on: jobs: build: + if: false runs-on: ubuntu-latest timeout-minutes: 60 defaults: From 7d87367d787ec34aae60448b5d98886ff00c172c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 19 Jan 2022 14:27:31 -0800 Subject: [PATCH 03/81] Disable another GHA workflow for Azure testing --- .github/workflows/docbuild-and-upload.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index e8ed6d4545194..5b167c290a9fb 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -16,6 +16,7 @@ env: jobs: web_and_docs: + if: false name: Doc Build and Upload runs-on: ubuntu-latest From 53908b5cbaa9fcf759ed0aaab92d05cf26359c97 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 19 Jan 2022 14:29:09 -0800 Subject: [PATCH 04/81] Fix windows workflow --- ci/azure/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 97fd8c34e02e6..b2e716176c196 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -30,7 +30,7 @@ jobs: displayName: 'Update conda' - bash: | - conda env create -q --file ci\\deps\\azure-windows-$(CONDA_PY).yaml + conda env create -q --file ci\\deps\\actions-$(CONDA_PY).yaml displayName: 'Create anaconda environment' - bash: | source activate pandas-dev From 0d3bfb14e2c939ed0f0edff2edb575da6962f78b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 Jan 2022 17:38:24 -0800 Subject: [PATCH 05/81] Cap at 120 mins --- ci/azure/posix.yml | 2 +- ci/azure/windows.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 4cd202c9f7b11..df072618d6ad0 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 0 + timeoutInMinutes: 90 pool: vmImage: ${{ parameters.vmImage }} strategy: diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index b2e716176c196..240423dab4a4f 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 0 + timeoutInMinutes: 120 pool: vmImage: ${{ parameters.vmImage }} strategy: From ff6e2da6a6cc6bec8941a6c5185810e2618e66f5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 Jan 2022 17:38:46 -0800 Subject: [PATCH 06/81] Verbose --- ci/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 203f8fe293a06..114a21a5f3f6c 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -24,7 +24,7 @@ if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then XVFB="xvfb-run " fi -PYTEST_CMD="${XVFB}pytest -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="${XVFB}pytest -v -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ $(uname) != "Linux" && $(uname) != "Darwin" ]]; then PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/plotting/" From c49e684080911f3953c25483a07aad76ca39b19e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 Jan 2022 19:56:09 -0800 Subject: [PATCH 07/81] Align pattern and timeoutinMinutes --- azure-pipelines.yml | 1 + ci/azure/windows.yml | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 1d22c5f2f2d07..c9e5facd545c9 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -18,6 +18,7 @@ pr: variables: PYTEST_WORKERS: auto PYTEST_TARGET: pandas + PATTERN: "not slow and not high_memory" jobs: - template: ci/azure/posix.yml diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 240423dab4a4f..45b55ef0fac05 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 120 + timeoutInMinutes: 90 pool: vmImage: ${{ parameters.vmImage }} strategy: From 20dcc494a65568a23ce85da10ad18ec4428b0cec Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 23 Jan 2022 22:06:51 -0800 Subject: [PATCH 08/81] Skip db as well --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index c9e5facd545c9..bb8de43623053 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -18,7 +18,7 @@ pr: variables: PYTEST_WORKERS: auto PYTEST_TARGET: pandas - PATTERN: "not slow and not high_memory" + PATTERN: "not slow and not high_memory and not db" jobs: - template: ci/azure/posix.yml From e6db0ed511c6ce0b11cb9a37c3f7b010b4eccc28 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 24 Jan 2022 10:30:42 -0800 Subject: [PATCH 09/81] Mark more db tests --- pandas/tests/io/test_sql.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 741af4324c1a6..584308db3bae8 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -498,6 +498,7 @@ def sqlite_buildin_iris(sqlite_buildin, iris_path): all_connectable_iris = sqlalchemy_connectable_iris + ["sqlite_buildin_iris"] +@pytest.mark.db @pytest.mark.parametrize("conn", all_connectable) @pytest.mark.parametrize("method", [None, "multi"]) def test_to_sql(conn, method, test_frame1, request): @@ -508,6 +509,7 @@ def test_to_sql(conn, method, test_frame1, request): assert count_rows(conn, "test_frame") == len(test_frame1) +@pytest.mark.db @pytest.mark.parametrize("conn", all_connectable) @pytest.mark.parametrize("mode, num_row_coef", [("replace", 1), ("append", 2)]) def test_to_sql_exist(conn, mode, num_row_coef, test_frame1, request): @@ -519,6 +521,7 @@ def test_to_sql_exist(conn, mode, num_row_coef, test_frame1, request): assert count_rows(conn, "test_frame") == num_row_coef * len(test_frame1) +@pytest.mark.db @pytest.mark.parametrize("conn", all_connectable) def test_to_sql_exist_fail(conn, test_frame1, request): conn = request.getfixturevalue(conn) @@ -531,6 +534,7 @@ def test_to_sql_exist_fail(conn, test_frame1, request): pandasSQL.to_sql(test_frame1, "test_frame", if_exists="fail") +@pytest.mark.db @pytest.mark.parametrize("conn", all_connectable_iris) def test_read_iris(conn, request): conn = request.getfixturevalue(conn) @@ -539,6 +543,7 @@ def test_read_iris(conn, request): check_iris_frame(iris_frame) +@pytest.mark.db @pytest.mark.parametrize("conn", sqlalchemy_connectable) def test_to_sql_callable(conn, test_frame1, request): conn = request.getfixturevalue(conn) @@ -557,6 +562,7 @@ def sample(pd_table, conn, keys, data_iter): assert count_rows(conn, "test_frame") == len(test_frame1) +@pytest.mark.db @pytest.mark.parametrize("conn", mysql_connectable) def test_default_type_conversion(conn, request): conn = request.getfixturevalue(conn) @@ -575,6 +581,7 @@ def test_default_type_conversion(conn, request): assert issubclass(df.BoolColWithNull.dtype.type, np.floating) +@pytest.mark.db @pytest.mark.parametrize("conn", mysql_connectable) def test_read_procedure(conn, request): conn = request.getfixturevalue(conn) @@ -611,6 +618,7 @@ def test_read_procedure(conn, request): tm.assert_frame_equal(df, res2) +@pytest.mark.db @pytest.mark.parametrize("conn", postgresql_connectable) def test_copy_from_callable_insertion_method(conn, request): # GH 8953 From aa2e03a451444ff0b36c364555b033e4529b0e50 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 24 Jan 2022 18:04:43 -0800 Subject: [PATCH 10/81] Skip moments tests on windows --- pandas/tests/window/moments/__init__.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/window/moments/__init__.py b/pandas/tests/window/moments/__init__.py index e69de29bb2d1d..dd58292791e02 100644 --- a/pandas/tests/window/moments/__init__.py +++ b/pandas/tests/window/moments/__init__.py @@ -0,0 +1,10 @@ +import os + +import pytest + +from pandas.compat import is_platform_windows + +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Causes flaky timeouts possibly due to test teardown in the CI", +) From 0bb104f44bb80891bdf9bf4dfc20bf42962f3367 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 24 Jan 2022 20:48:46 -0800 Subject: [PATCH 11/81] Add PANDAS_CI in azure-pipelines.yml --- azure-pipelines.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index bb8de43623053..f597f7dd4f672 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -19,6 +19,7 @@ variables: PYTEST_WORKERS: auto PYTEST_TARGET: pandas PATTERN: "not slow and not high_memory and not db" + PANDAS_CI: 1 jobs: - template: ci/azure/posix.yml From e2753434cb9a3295d13c7571718926eda8702b7d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 24 Jan 2022 20:53:55 -0800 Subject: [PATCH 12/81] Remove unused fixture which causes hang? --- pandas/tests/io/excel/test_readers.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 589c98721f139..24537f538d8fc 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -150,9 +150,7 @@ def parser(self, *args, **kwargs): expected = expected_defaults[read_ext[1:]] assert result == expected - def test_usecols_int(self, read_ext, df_ref): - df_ref = df_ref.reindex(columns=["A", "B", "C"]) - + def test_usecols_int(self, read_ext): # usecols as int msg = "Passing an integer for `usecols`" with pytest.raises(ValueError, match=msg): From 53340a5e79369beb4e1c605798f788568d41b98e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 25 Jan 2022 08:52:52 -0800 Subject: [PATCH 13/81] Try this condiditon --- pandas/tests/window/moments/__init__.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/window/moments/__init__.py b/pandas/tests/window/moments/__init__.py index dd58292791e02..da55015b4057d 100644 --- a/pandas/tests/window/moments/__init__.py +++ b/pandas/tests/window/moments/__init__.py @@ -1,10 +1,8 @@ -import os - import pytest from pandas.compat import is_platform_windows pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + is_platform_windows(), reason="Causes flaky timeouts possibly due to test teardown in the CI", ) From 97e04cd95ee75d431758b98fa99e19c9460faf60 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 25 Jan 2022 08:56:11 -0800 Subject: [PATCH 14/81] Actually is it numba on windows? --- pandas/tests/window/moments/__init__.py | 4 +++- pandas/tests/window/test_numba.py | 8 ++++++++ pandas/tests/window/test_online.py | 8 ++++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/tests/window/moments/__init__.py b/pandas/tests/window/moments/__init__.py index da55015b4057d..dd58292791e02 100644 --- a/pandas/tests/window/moments/__init__.py +++ b/pandas/tests/window/moments/__init__.py @@ -1,8 +1,10 @@ +import os + import pytest from pandas.compat import is_platform_windows pytestmark = pytest.mark.skipif( - is_platform_windows(), + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), reason="Causes flaky timeouts possibly due to test teardown in the CI", ) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index a14515ca9c018..572261cd68d88 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -1,6 +1,9 @@ +import os + import numpy as np import pytest +from pandas.compat import is_platform_windows from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td @@ -13,6 +16,11 @@ import pandas._testing as tm from pandas.core.util.numba_ import NUMBA_FUNC_CACHE +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Causes flaky timeouts possibly due to test teardown in the CI", +) + @td.skip_if_no("numba") @pytest.mark.filterwarnings("ignore:\n") diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index 80cf1c55958ee..20512aed2258a 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -1,6 +1,9 @@ +import os + import numpy as np import pytest +from pandas.compat import is_platform_windows import pandas.util._test_decorators as td from pandas import ( @@ -9,6 +12,11 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Causes flaky timeouts possibly due to test teardown in the CI", +) + @td.skip_if_no("numba") @pytest.mark.filterwarnings("ignore:\n") From 0b95106a839026665a6eab43473ac2bea4098017 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 25 Jan 2022 12:00:52 -0800 Subject: [PATCH 15/81] Don't even collect moments tests --- ci/run_tests.sh | 2 +- pandas/tests/window/test_numba.py | 8 -------- pandas/tests/window/test_online.py | 8 -------- 3 files changed, 1 insertion(+), 17 deletions(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 114a21a5f3f6c..239e74ae4b81d 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -27,7 +27,7 @@ fi PYTEST_CMD="${XVFB}pytest -v -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ $(uname) != "Linux" && $(uname) != "Darwin" ]]; then - PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/plotting/" + PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/window/moments/ --ignore=pandas/tests/plotting/" fi echo $PYTEST_CMD diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 572261cd68d88..a14515ca9c018 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -1,9 +1,6 @@ -import os - import numpy as np import pytest -from pandas.compat import is_platform_windows from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td @@ -16,11 +13,6 @@ import pandas._testing as tm from pandas.core.util.numba_ import NUMBA_FUNC_CACHE -pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Causes flaky timeouts possibly due to test teardown in the CI", -) - @td.skip_if_no("numba") @pytest.mark.filterwarnings("ignore:\n") diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index 20512aed2258a..80cf1c55958ee 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -1,9 +1,6 @@ -import os - import numpy as np import pytest -from pandas.compat import is_platform_windows import pandas.util._test_decorators as td from pandas import ( @@ -12,11 +9,6 @@ ) import pandas._testing as tm -pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Causes flaky timeouts possibly due to test teardown in the CI", -) - @td.skip_if_no("numba") @pytest.mark.filterwarnings("ignore:\n") From 02cb9c21fd87d1484896a423d461c00de7137c28 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 25 Jan 2022 15:03:22 -0800 Subject: [PATCH 16/81] Exclude numba tests --- ci/run_tests.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 239e74ae4b81d..b7aa6be0cef68 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -27,7 +27,9 @@ fi PYTEST_CMD="${XVFB}pytest -v -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ $(uname) != "Linux" && $(uname) != "Darwin" ]]; then - PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/window/moments/ --ignore=pandas/tests/plotting/" + # Windows can hang during pytest teardown step for window/moments tests + # Windows can crash for windows/numba tests + PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/window/moments/ --ignore=pandas/tests/window/test_numba.py --ignore=pandas/tests/window/test_online.py --ignore=pandas/tests/plotting/" fi echo $PYTEST_CMD From 63015f6bba56bbc6dec9dd13834e8bc91c790d49 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 25 Jan 2022 20:21:24 -0800 Subject: [PATCH 17/81] Maybe use a context manager? --- pandas/tests/io/excel/test_readers.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 24537f538d8fc..117ae8eb1f099 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -131,8 +131,6 @@ def test_engine_used(self, read_ext, engine, monkeypatch): def parser(self, *args, **kwargs): return self.engine - monkeypatch.setattr(pd.ExcelFile, "parse", parser) - expected_defaults = { "xlsx": "openpyxl", "xlsm": "openpyxl", @@ -140,9 +138,10 @@ def parser(self, *args, **kwargs): "xls": "xlrd", "ods": "odf", } - - with open("test1" + read_ext, "rb") as f: - result = pd.read_excel(f) + with monkeypatch.context() as m: + m.setattr(pd.ExcelFile, "parse", parser) + with open("test1" + read_ext, "rb") as f: + result = pd.read_excel(f) if engine is not None: expected = engine From 16cf72d4285899000ef3607c096f1ababe019c90 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 25 Jan 2022 20:24:30 -0800 Subject: [PATCH 18/81] Maybe longer timeout for windows? --- ci/azure/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 45b55ef0fac05..240423dab4a4f 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 90 + timeoutInMinutes: 120 pool: vmImage: ${{ parameters.vmImage }} strategy: From dda299d162c4c1c632ba178c4ee34b5fcf4d04d4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 25 Jan 2022 22:32:33 -0800 Subject: [PATCH 19/81] Already skipping in ci/run_tests --- pandas/tests/window/moments/__init__.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/pandas/tests/window/moments/__init__.py b/pandas/tests/window/moments/__init__.py index dd58292791e02..e69de29bb2d1d 100644 --- a/pandas/tests/window/moments/__init__.py +++ b/pandas/tests/window/moments/__init__.py @@ -1,10 +0,0 @@ -import os - -import pytest - -from pandas.compat import is_platform_windows - -pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Causes flaky timeouts possibly due to test teardown in the CI", -) From 714125859d6bc91cb32de0ad23fffe91ad2de70f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 25 Jan 2022 22:33:06 -0800 Subject: [PATCH 20/81] Windows takes a while --- ci/azure/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 240423dab4a4f..a506443827e88 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 120 + timeoutInMinutes: 180 pool: vmImage: ${{ parameters.vmImage }} strategy: From 26221a80de8aed0955075fa241e5af4e44c301f0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 09:57:29 -0800 Subject: [PATCH 21/81] Skip hanging test on Windows --- pandas/tests/io/excel/test_readers.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 117ae8eb1f099..c228477063c4b 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -11,6 +11,7 @@ import numpy as np import pytest +from pandas.compat import is_platform_windows import pandas.util._test_decorators as td import pandas as pd @@ -126,6 +127,10 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch): monkeypatch.chdir(datapath("io", "data", "excel")) monkeypatch.setattr(pd, "read_excel", func) + @pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Flakily hangs on multi-process CI Windows environment", + ) def test_engine_used(self, read_ext, engine, monkeypatch): # GH 38884 def parser(self, *args, **kwargs): @@ -1308,7 +1313,7 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch): monkeypatch.chdir(datapath("io", "data", "excel")) monkeypatch.setattr(pd, "ExcelFile", func) - def test_engine_used(self, read_ext, engine, monkeypatch): + def test_engine_used(self, read_ext, engine): expected_defaults = { "xlsx": "openpyxl", "xlsm": "openpyxl", From 9aa05cefc8453e0791cd4a5a723ee21c0469f11f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 14:38:44 -0800 Subject: [PATCH 22/81] Is this file just flaky for windows? --- ci/azure/windows.yml | 2 +- pandas/tests/io/excel/test_readers.py | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 3df45532c3d32..39660ac29ec34 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 180 + timeoutInMinutes: 150 pool: vmImage: ${{ parameters.vmImage }} strategy: diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index c228477063c4b..93eb121384ac5 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -154,6 +154,10 @@ def parser(self, *args, **kwargs): expected = expected_defaults[read_ext[1:]] assert result == expected + @pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Flakily hangs on multi-process CI Windows environment", + ) def test_usecols_int(self, read_ext): # usecols as int msg = "Passing an integer for `usecols`" From ec6a401c84c4208ce38e84d7df60dd6c13ce7a01 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 16:07:22 -0800 Subject: [PATCH 23/81] Don't make check_for_file_leaks raise on non-excel files --- pandas/tests/io/excel/conftest.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py index 0455e0d61ad97..b42263edb89b1 100644 --- a/pandas/tests/io/excel/conftest.py +++ b/pandas/tests/io/excel/conftest.py @@ -58,8 +58,12 @@ def check_for_file_leaks(): yield else: + exts = [".xls", ".xlsx", ".xlsm", ".ods", ".xlsb"] proc = psutil.Process() flist = proc.open_files() yield - flist2 = proc.open_files() + # Only care about excel files in this conftest + flist2 = [ + f for f in proc.open_files() if any(f.path.endswith(ext) for ext in exts) + ] assert flist == flist2 From dfa137ed6fff6f8004bafc247fa8368af4286e70 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 19:21:34 -0800 Subject: [PATCH 24/81] Check excel files on both sides --- pandas/tests/io/excel/conftest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py index b42263edb89b1..9a376975141cc 100644 --- a/pandas/tests/io/excel/conftest.py +++ b/pandas/tests/io/excel/conftest.py @@ -58,11 +58,13 @@ def check_for_file_leaks(): yield else: + # Only care about excel files in this conftest exts = [".xls", ".xlsx", ".xlsm", ".ods", ".xlsb"] proc = psutil.Process() - flist = proc.open_files() + flist = [ + f for f in proc.open_files() if any(f.path.endswith(ext) for ext in exts) + ] yield - # Only care about excel files in this conftest flist2 = [ f for f in proc.open_files() if any(f.path.endswith(ext) for ext in exts) ] From d08ed001f72f08947cd6f325778a8724d23bb4da Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 22:31:13 -0800 Subject: [PATCH 25/81] this entire file is sus --- pandas/tests/io/excel/test_readers.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 93eb121384ac5..ee5cf7270d564 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -25,6 +25,12 @@ from pandas.tests.io.excel import xlrd_version from pandas.util.version import Version +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Any test in this file can hang on the multi-process " + "CI Windows environment", +) + read_ext_params = [".xls", ".xlsx", ".xlsm", ".xlsb", ".ods"] engine_params = [ # Add any engines to test here @@ -127,10 +133,6 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch): monkeypatch.chdir(datapath("io", "data", "excel")) monkeypatch.setattr(pd, "read_excel", func) - @pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Flakily hangs on multi-process CI Windows environment", - ) def test_engine_used(self, read_ext, engine, monkeypatch): # GH 38884 def parser(self, *args, **kwargs): @@ -154,10 +156,6 @@ def parser(self, *args, **kwargs): expected = expected_defaults[read_ext[1:]] assert result == expected - @pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Flakily hangs on multi-process CI Windows environment", - ) def test_usecols_int(self, read_ext): # usecols as int msg = "Passing an integer for `usecols`" From dcb30288121a893e49497ea32b6ec13278d128c6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 26 Jan 2022 22:44:40 -0800 Subject: [PATCH 26/81] Will it ever time out? --- ci/azure/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 39660ac29ec34..ed52cad7f0bd0 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 150 + timeoutInMinutes: 0 pool: vmImage: ${{ parameters.vmImage }} strategy: From abcd626b5664b5f7862b474c6d90699c461583cf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 27 Jan 2022 09:24:12 -0800 Subject: [PATCH 27/81] Exclude excel test_writers --- pandas/tests/io/excel/test_writers.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 6f06ef9c09e52..70cce7108eb6e 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -11,6 +11,7 @@ import numpy as np import pytest +from pandas.compat import is_platform_windows import pandas.util._test_decorators as td import pandas as pd @@ -31,6 +32,12 @@ register_writer, ) +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Any test in this file can hang on the multi-process " + "CI Windows environment", +) + @pytest.fixture def path(ext): From d5f6b0e031a19c6f5755d72fe57301d00843481e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 27 Jan 2022 09:25:30 -0800 Subject: [PATCH 28/81] 75 min timeout --- ci/azure/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index ed52cad7f0bd0..46089fa1b4857 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 0 + timeoutInMinutes: 75 pool: vmImage: ${{ parameters.vmImage }} strategy: From 221f52a37821da9731c92d93e0119a6e197e7635 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 27 Jan 2022 13:30:26 -0800 Subject: [PATCH 29/81] Disable file leak check for windows: --- pandas/util/_test_decorators.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 33bde4e69b042..e4c127b0ffda9 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -260,7 +260,8 @@ def file_leak_context(): ContextManager analogue to check_file_leaks. """ psutil = safe_import("psutil") - if not psutil: + if not psutil or is_platform_windows(): + # Windows grabs system files we're not interested in yield else: proc = psutil.Process() From a1929abe4bdae7a693bbee4569b97b7843ae2eb3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 27 Jan 2022 13:38:50 -0800 Subject: [PATCH 30/81] Give windows more time --- ci/azure/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 46089fa1b4857..3b489d92388da 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 75 + timeoutInMinutes: 90 pool: vmImage: ${{ parameters.vmImage }} strategy: From 2bab2d3fa4b0d049b7db5fd16d451863d66f4367 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 27 Jan 2022 15:26:21 -0800 Subject: [PATCH 31/81] Move skipping to tests so they are logged --- ci/run_tests.sh | 6 ------ pandas/tests/plotting/__init__.py | 11 +++++++++++ pandas/tests/window/moments/__init__.py | 11 +++++++++++ pandas/tests/window/test_numba.py | 9 +++++++++ pandas/tests/window/test_online.py | 9 +++++++++ pandas/util/_test_decorators.py | 7 +++++-- 6 files changed, 45 insertions(+), 8 deletions(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 486ef06a44a14..3d3119f10e3e7 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -26,12 +26,6 @@ fi PYTEST_CMD="${XVFB}pytest -v -r fEs -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" -if [[ $(uname) != "Linux" && $(uname) != "Darwin" ]]; then - # Windows can hang during pytest teardown step for window/moments tests - # Windows can crash for windows/numba tests - PYTEST_CMD="$PYTEST_CMD --ignore=pandas/tests/window/moments/ --ignore=pandas/tests/window/test_numba.py --ignore=pandas/tests/window/test_online.py --ignore=pandas/tests/plotting/" -fi - echo $PYTEST_CMD sh -c "$PYTEST_CMD" diff --git a/pandas/tests/plotting/__init__.py b/pandas/tests/plotting/__init__.py index e69de29bb2d1d..854d40c349c51 100644 --- a/pandas/tests/plotting/__init__.py +++ b/pandas/tests/plotting/__init__.py @@ -0,0 +1,11 @@ +import os + +import pytest + +from pandas.compat import is_platform_windows + +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Any test in this directory can hang on the multi-process " + "CI Windows environment", +) diff --git a/pandas/tests/window/moments/__init__.py b/pandas/tests/window/moments/__init__.py index e69de29bb2d1d..854d40c349c51 100644 --- a/pandas/tests/window/moments/__init__.py +++ b/pandas/tests/window/moments/__init__.py @@ -0,0 +1,11 @@ +import os + +import pytest + +from pandas.compat import is_platform_windows + +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Any test in this directory can hang on the multi-process " + "CI Windows environment", +) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index a14515ca9c018..2e19911df6444 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -1,6 +1,9 @@ +import os + import numpy as np import pytest +from pandas.compat import is_platform_windows from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td @@ -13,6 +16,12 @@ import pandas._testing as tm from pandas.core.util.numba_ import NUMBA_FUNC_CACHE +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Any test in this file can hang on the multi-process " + "CI Windows environment", +) + @td.skip_if_no("numba") @pytest.mark.filterwarnings("ignore:\n") diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index 80cf1c55958ee..acadf9386b3f0 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -1,6 +1,9 @@ +import os + import numpy as np import pytest +from pandas.compat import is_platform_windows import pandas.util._test_decorators as td from pandas import ( @@ -9,6 +12,12 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Any test in this file can hang on the multi-process " + "CI Windows environment", +) + @td.skip_if_no("numba") @pytest.mark.filterwarnings("ignore:\n") diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index e4c127b0ffda9..8f93298e78600 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -27,6 +27,7 @@ def test_foo(): from contextlib import contextmanager import locale +import os from typing import Callable import warnings @@ -260,8 +261,10 @@ def file_leak_context(): ContextManager analogue to check_file_leaks. """ psutil = safe_import("psutil") - if not psutil or is_platform_windows(): - # Windows grabs system files we're not interested in + if not psutil or ( + is_platform_windows() and os.environ.get("PANDAS_CI", "0") == "1" + ): + # Windows CI environments grab system files we're not interested in yield else: proc = psutil.Process() From 5e14aa8da7b7add92684d2c47154096a7d0ffe3b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 27 Jan 2022 17:04:48 -0800 Subject: [PATCH 32/81] What if i include my conftest discovery? --- pandas/conftest.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/pandas/conftest.py b/pandas/conftest.py index e61d9ee18cadb..952177f342c46 100644 --- a/pandas/conftest.py +++ b/pandas/conftest.py @@ -115,6 +115,12 @@ def pytest_collection_modifyitems(items, config): ] for item in items: + if config.getoption("--doctest-modules") or config.getoption( + "--doctest-cython", default=False + ): + # autouse=True for the add_doctest_imports can lead to expensive teardowns + # since doctest_namespace is a session fixture + item.add_marker(pytest.mark.usefixtures("add_doctest_imports")) # mark all tests in the pandas/tests/frame directory with "arraymanager" if "/frame/" in item.nodeid: item.add_marker(pytest.mark.arraymanager) @@ -187,6 +193,15 @@ def pytest_collection_modifyitems(items, config): ) +@pytest.fixture +def add_doctest_imports(doctest_namespace): + """ + Make `np` and `pd` names available for doctests. + """ + doctest_namespace["np"] = np + doctest_namespace["pd"] = pd + + # ---------------------------------------------------------------- # Autouse fixtures # ---------------------------------------------------------------- @@ -198,15 +213,6 @@ def configure_tests(): pd.set_option("chained_assignment", "raise") -@pytest.fixture(autouse=True) -def add_imports(doctest_namespace): - """ - Make `np` and `pd` names available for doctests. - """ - doctest_namespace["np"] = np - doctest_namespace["pd"] = pd - - # ---------------------------------------------------------------- # Common arguments # ---------------------------------------------------------------- From d610119327e233b2e52d46e675c81b5ea77aa53f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 27 Jan 2022 20:35:45 -0800 Subject: [PATCH 33/81] Don't autouse check_for_file_leaks --- pandas/tests/io/excel/conftest.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py index 9a376975141cc..cbaef6c9f6b27 100644 --- a/pandas/tests/io/excel/conftest.py +++ b/pandas/tests/io/excel/conftest.py @@ -1,5 +1,6 @@ import pytest +from pandas.compat import is_platform_windows import pandas.util._test_decorators as td import pandas._testing as tm @@ -43,7 +44,8 @@ def read_ext(request): return request.param -@pytest.fixture(autouse=True) +# Checking for file leaks can hang on Windows CI +@pytest.fixture(autouse=not is_platform_windows()) def check_for_file_leaks(): """ Fixture to run around every test to ensure that we are not leaking files. From 9e2e7fabdfa1b644106c75980eb2c61056830b72 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 09:36:04 -0800 Subject: [PATCH 34/81] Re-enable some test --- pandas/tests/plotting/__init__.py | 11 ----------- pandas/tests/window/moments/__init__.py | 11 ----------- 2 files changed, 22 deletions(-) diff --git a/pandas/tests/plotting/__init__.py b/pandas/tests/plotting/__init__.py index 854d40c349c51..e69de29bb2d1d 100644 --- a/pandas/tests/plotting/__init__.py +++ b/pandas/tests/plotting/__init__.py @@ -1,11 +0,0 @@ -import os - -import pytest - -from pandas.compat import is_platform_windows - -pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Any test in this directory can hang on the multi-process " - "CI Windows environment", -) diff --git a/pandas/tests/window/moments/__init__.py b/pandas/tests/window/moments/__init__.py index 854d40c349c51..e69de29bb2d1d 100644 --- a/pandas/tests/window/moments/__init__.py +++ b/pandas/tests/window/moments/__init__.py @@ -1,11 +0,0 @@ -import os - -import pytest - -from pandas.compat import is_platform_windows - -pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Any test in this directory can hang on the multi-process " - "CI Windows environment", -) From 28cc8ae75cb3ade97f434e9dcc91b7322e16b85a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 09:36:40 -0800 Subject: [PATCH 35/81] Up to 2 hours for windows --- ci/azure/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 3b489d92388da..9e464722322ca 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 90 + timeoutInMinutes: 120 pool: vmImage: ${{ parameters.vmImage }} strategy: From 7481bed12b199169e3e7f9f1d7e5f494bf6c37a0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 11:53:24 -0800 Subject: [PATCH 36/81] Enable more tests --- pandas/tests/io/excel/test_readers.py | 7 ------- pandas/tests/io/excel/test_writers.py | 7 ------- 2 files changed, 14 deletions(-) diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index ee5cf7270d564..c7ddac8f03fb4 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -11,7 +11,6 @@ import numpy as np import pytest -from pandas.compat import is_platform_windows import pandas.util._test_decorators as td import pandas as pd @@ -25,12 +24,6 @@ from pandas.tests.io.excel import xlrd_version from pandas.util.version import Version -pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Any test in this file can hang on the multi-process " - "CI Windows environment", -) - read_ext_params = [".xls", ".xlsx", ".xlsm", ".xlsb", ".ods"] engine_params = [ # Add any engines to test here diff --git a/pandas/tests/io/excel/test_writers.py b/pandas/tests/io/excel/test_writers.py index 70cce7108eb6e..6f06ef9c09e52 100644 --- a/pandas/tests/io/excel/test_writers.py +++ b/pandas/tests/io/excel/test_writers.py @@ -11,7 +11,6 @@ import numpy as np import pytest -from pandas.compat import is_platform_windows import pandas.util._test_decorators as td import pandas as pd @@ -32,12 +31,6 @@ register_writer, ) -pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Any test in this file can hang on the multi-process " - "CI Windows environment", -) - @pytest.fixture def path(ext): From c58bb53869e4b9e0d62713f17a3d9b120dd746c3 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 13:38:15 -0800 Subject: [PATCH 37/81] Now try numba --- pandas/tests/window/test_numba.py | 9 --------- pandas/tests/window/test_online.py | 9 --------- 2 files changed, 18 deletions(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 642fef16eb4ba..a9e38751c9a2f 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -1,9 +1,6 @@ -import os - import numpy as np import pytest -from pandas.compat import is_platform_windows from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td @@ -16,12 +13,6 @@ import pandas._testing as tm from pandas.core.util.numba_ import NUMBA_FUNC_CACHE -pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Any test in this file can hang on the multi-process " - "CI Windows environment", -) - @pytest.fixture(params=["single", "table"]) def method(request): diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index acadf9386b3f0..80cf1c55958ee 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -1,9 +1,6 @@ -import os - import numpy as np import pytest -from pandas.compat import is_platform_windows import pandas.util._test_decorators as td from pandas import ( @@ -12,12 +9,6 @@ ) import pandas._testing as tm -pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Any test in this file can hang on the multi-process " - "CI Windows environment", -) - @td.skip_if_no("numba") @pytest.mark.filterwarnings("ignore:\n") From cca379e61e1e3c0818f35fb902af4abb1bf46eee Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 15:49:47 -0800 Subject: [PATCH 38/81] Revert "Now try numba" This reverts commit c58bb53869e4b9e0d62713f17a3d9b120dd746c3. --- pandas/tests/window/test_numba.py | 9 +++++++++ pandas/tests/window/test_online.py | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index a9e38751c9a2f..642fef16eb4ba 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -1,6 +1,9 @@ +import os + import numpy as np import pytest +from pandas.compat import is_platform_windows from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td @@ -13,6 +16,12 @@ import pandas._testing as tm from pandas.core.util.numba_ import NUMBA_FUNC_CACHE +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Any test in this file can hang on the multi-process " + "CI Windows environment", +) + @pytest.fixture(params=["single", "table"]) def method(request): diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index 80cf1c55958ee..acadf9386b3f0 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -1,6 +1,9 @@ +import os + import numpy as np import pytest +from pandas.compat import is_platform_windows import pandas.util._test_decorators as td from pandas import ( @@ -9,6 +12,12 @@ ) import pandas._testing as tm +pytestmark = pytest.mark.skipif( + os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), + reason="Any test in this file can hang on the multi-process " + "CI Windows environment", +) + @td.skip_if_no("numba") @pytest.mark.filterwarnings("ignore:\n") From d47b04a208fb542329fae1a62b02aaac064cd7c6 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 15:51:50 -0800 Subject: [PATCH 39/81] It was numba afterall --- pandas/tests/window/test_numba.py | 4 ++-- pandas/tests/window/test_online.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 642fef16eb4ba..30c9d9abee9c7 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -18,8 +18,8 @@ pytestmark = pytest.mark.skipif( os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Any test in this file can hang on the multi-process " - "CI Windows environment", + reason="In a multi-process Windows CI environment can lead to " + "'Windows fatal exception: stack overflow'", ) diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index acadf9386b3f0..58fec100ee404 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -14,8 +14,8 @@ pytestmark = pytest.mark.skipif( os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="Any test in this file can hang on the multi-process " - "CI Windows environment", + reason="In a multi-process Windows CI environment can lead to " + "'Windows fatal exception: stack overflow'", ) From d7f75139a142124eb6353aa1081e1245c9b3fc90 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 17:54:08 -0800 Subject: [PATCH 40/81] Numba flaky on MacOS too --- pandas/tests/window/test_numba.py | 13 +++++++++---- pandas/tests/window/test_online.py | 13 +++++++++---- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 30c9d9abee9c7..1696975d33b7b 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas.compat import is_platform_windows +from pandas.compat import ( + is_platform_mac, + is_platform_windows, +) from pandas.errors import NumbaUtilError import pandas.util._test_decorators as td @@ -17,9 +20,11 @@ from pandas.core.util.numba_ import NUMBA_FUNC_CACHE pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="In a multi-process Windows CI environment can lead to " - "'Windows fatal exception: stack overflow'", + os.environ.get("PANDAS_CI", "0") == "1" + and (is_platform_windows() or is_platform_mac()), + reason="On Azure CI, Windows can fail with " + "'Windows fatal exception: stack overflow' " + "and MacOS can timeout", ) diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index 58fec100ee404..707a389f6bb1b 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -3,7 +3,10 @@ import numpy as np import pytest -from pandas.compat import is_platform_windows +from pandas.compat import ( + is_platform_mac, + is_platform_windows, +) import pandas.util._test_decorators as td from pandas import ( @@ -13,9 +16,11 @@ import pandas._testing as tm pytestmark = pytest.mark.skipif( - os.environ.get("PANDAS_CI", "0") == "1" and is_platform_windows(), - reason="In a multi-process Windows CI environment can lead to " - "'Windows fatal exception: stack overflow'", + os.environ.get("PANDAS_CI", "0") == "1" + and (is_platform_windows() or is_platform_mac()), + reason="On Azure CI, Windows can fail with " + "'Windows fatal exception: stack overflow' " + "and MacOS can timeout", ) From d9d3e86fb5c012bd758db12319ba7b253adbf1bf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 20:29:24 -0800 Subject: [PATCH 41/81] Limit pyarrow to 1 thread --- pandas/tests/io/parser/conftest.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 2070057aff10b..b6ebf30b9b6cb 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -108,10 +108,11 @@ def all_parsers(request): parser = request.param() if parser.engine == "pyarrow": pytest.importorskip("pyarrow", VERSIONS["pyarrow"]) - # Try setting num cpus to 1 to avoid hangs? + # Try setting num cpus to 1 to avoid hangs in CI import pyarrow pyarrow.set_cpu_count(1) + pyarrow.set_io_thread_count(1) return parser From 22773a7576f53666263c69133eea1a7bb5822eb0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 20:31:56 -0800 Subject: [PATCH 42/81] Simplify condition --- pandas/util/_test_decorators.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/util/_test_decorators.py b/pandas/util/_test_decorators.py index 8f93298e78600..862215d3bcfe0 100644 --- a/pandas/util/_test_decorators.py +++ b/pandas/util/_test_decorators.py @@ -27,7 +27,6 @@ def test_foo(): from contextlib import contextmanager import locale -import os from typing import Callable import warnings @@ -261,9 +260,7 @@ def file_leak_context(): ContextManager analogue to check_file_leaks. """ psutil = safe_import("psutil") - if not psutil or ( - is_platform_windows() and os.environ.get("PANDAS_CI", "0") == "1" - ): + if not psutil or is_platform_windows(): # Windows CI environments grab system files we're not interested in yield else: From 87443c6f2c6a810c21e39e2b941bb5b8c9b93b99 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 20:32:25 -0800 Subject: [PATCH 43/81] Use 90 mins --- ci/azure/windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/windows.yml b/ci/azure/windows.yml index 9e464722322ca..3b489d92388da 100644 --- a/ci/azure/windows.yml +++ b/ci/azure/windows.yml @@ -4,7 +4,7 @@ parameters: jobs: - job: ${{ parameters.name }} - timeoutInMinutes: 120 + timeoutInMinutes: 90 pool: vmImage: ${{ parameters.vmImage }} strategy: From 6aa0a11bf6cfae5a509bcce4ed9f8690ae330f14 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 21:14:37 -0800 Subject: [PATCH 44/81] Add comment about marking single --- pandas/tests/io/parser/conftest.py | 2 ++ pandas/tests/window/test_numba.py | 1 + pandas/tests/window/test_online.py | 1 + 3 files changed, 4 insertions(+) diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index b6ebf30b9b6cb..b787d1545ff80 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -109,6 +109,8 @@ def all_parsers(request): if parser.engine == "pyarrow": pytest.importorskip("pyarrow", VERSIONS["pyarrow"]) # Try setting num cpus to 1 to avoid hangs in CI + # TODO: Mark these as pytest.mark.single GH 44584 + # or better somehow pass use_thread=False into pyarrow import pyarrow pyarrow.set_cpu_count(1) diff --git a/pandas/tests/window/test_numba.py b/pandas/tests/window/test_numba.py index 1696975d33b7b..36625d86873f6 100644 --- a/pandas/tests/window/test_numba.py +++ b/pandas/tests/window/test_numba.py @@ -19,6 +19,7 @@ import pandas._testing as tm from pandas.core.util.numba_ import NUMBA_FUNC_CACHE +# TODO: Mark these as pytest.mark.single GH 44584 pytestmark = pytest.mark.skipif( os.environ.get("PANDAS_CI", "0") == "1" and (is_platform_windows() or is_platform_mac()), diff --git a/pandas/tests/window/test_online.py b/pandas/tests/window/test_online.py index 707a389f6bb1b..debf87bf6ade1 100644 --- a/pandas/tests/window/test_online.py +++ b/pandas/tests/window/test_online.py @@ -15,6 +15,7 @@ ) import pandas._testing as tm +# TODO: Mark these as pytest.mark.single GH 44584 pytestmark = pytest.mark.skipif( os.environ.get("PANDAS_CI", "0") == "1" and (is_platform_windows() or is_platform_mac()), From f636228fbd69f5c249ae2820608c452c0a9cb0a4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 21:26:25 -0800 Subject: [PATCH 45/81] Set a different way since not available in py38 --- .github/workflows/datamanger.yml | 3 +++ .github/workflows/posix.yml | 3 +++ azure-pipelines.yml | 3 +++ pandas/tests/io/parser/conftest.py | 1 - 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/datamanger.yml b/.github/workflows/datamanger.yml index 749e369d164e9..f65f6911ef89f 100644 --- a/.github/workflows/datamanger.yml +++ b/.github/workflows/datamanger.yml @@ -15,6 +15,9 @@ on: env: ENV_FILE: environment.yml PANDAS_CI: 1 + # Avoid hanging threads in pyarrow + OMP_NUM_THREADS: 1 + OMP_THREAD_LIMIT: 1 jobs: data_manager: diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index a83ca5c375c3a..3a0b4fbadd0e5 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -15,6 +15,9 @@ on: env: PYTEST_WORKERS: "auto" PANDAS_CI: 1 + # Avoid hanging threads in pyarrow + OMP_NUM_THREADS: 1 + OMP_THREAD_LIMIT: 1 jobs: pytest: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f597f7dd4f672..4ebde302ee48d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,6 +20,9 @@ variables: PYTEST_TARGET: pandas PATTERN: "not slow and not high_memory and not db" PANDAS_CI: 1 + # Avoid hanging threads in pyarrow + OMP_NUM_THREADS: 1 + OMP_THREAD_LIMIT: 1 jobs: - template: ci/azure/posix.yml diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index b787d1545ff80..5df8eb5dff460 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -114,7 +114,6 @@ def all_parsers(request): import pyarrow pyarrow.set_cpu_count(1) - pyarrow.set_io_thread_count(1) return parser From 7e0662dc08ad2d87a55a189a35da004888b55a90 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 28 Jan 2022 23:06:09 -0800 Subject: [PATCH 46/81] Declare victory --- .github/workflows/code-checks.yml | 3 --- .github/workflows/datamanger.yml | 1 - .github/workflows/docbuild-and-upload.yml | 1 - .github/workflows/posix.yml | 1 - .github/workflows/sdist.yml | 1 - ci/run_tests.sh | 2 +- 6 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/code-checks.yml b/.github/workflows/code-checks.yml index 679246673330c..8a74ae14404b3 100644 --- a/.github/workflows/code-checks.yml +++ b/.github/workflows/code-checks.yml @@ -16,7 +16,6 @@ env: jobs: pre_commit: - if: false name: pre-commit runs-on: ubuntu-latest concurrency: @@ -36,7 +35,6 @@ jobs: uses: pre-commit/action@v2.0.3 typing_and_docstring_validation: - if: false name: Docstring and typing validation runs-on: ubuntu-latest defaults: @@ -103,7 +101,6 @@ jobs: if: ${{ steps.build.outcome == 'success' }} asv-benchmarks: - if: false name: ASV Benchmarks runs-on: ubuntu-latest defaults: diff --git a/.github/workflows/datamanger.yml b/.github/workflows/datamanger.yml index f65f6911ef89f..889b225c0c69d 100644 --- a/.github/workflows/datamanger.yml +++ b/.github/workflows/datamanger.yml @@ -21,7 +21,6 @@ env: jobs: data_manager: - if: false name: Test experimental data manager runs-on: ubuntu-latest services: diff --git a/.github/workflows/docbuild-and-upload.yml b/.github/workflows/docbuild-and-upload.yml index c15c217ddbdab..4cce75779d750 100644 --- a/.github/workflows/docbuild-and-upload.yml +++ b/.github/workflows/docbuild-and-upload.yml @@ -16,7 +16,6 @@ env: jobs: web_and_docs: - if: false name: Doc Build and Upload runs-on: ubuntu-latest diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 3a0b4fbadd0e5..08b891e2c58b6 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -21,7 +21,6 @@ env: jobs: pytest: - if: false runs-on: ubuntu-latest defaults: run: diff --git a/.github/workflows/sdist.yml b/.github/workflows/sdist.yml index ca2e329436ae4..dd030f1aacc44 100644 --- a/.github/workflows/sdist.yml +++ b/.github/workflows/sdist.yml @@ -14,7 +14,6 @@ on: jobs: build: - if: false runs-on: ubuntu-latest timeout-minutes: 60 defaults: diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 3d3119f10e3e7..f558898976cd9 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -24,7 +24,7 @@ if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then XVFB="xvfb-run " fi -PYTEST_CMD="${XVFB}pytest -v -r fEs -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="${XVFB}pytest -r fEs -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" echo $PYTEST_CMD sh -c "$PYTEST_CMD" From 24074b6ef801d7fabf6ebd4c4c0557ada51e2524 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 29 Jan 2022 11:22:52 -0800 Subject: [PATCH 47/81] Monkeypatch to disabble threads --- pandas/io/parsers/arrow_parser_wrapper.py | 2 +- pandas/tests/io/parser/conftest.py | 23 ++++++++++++++++++----- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 618da9d33c490..512b21b02c2eb 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -145,7 +145,7 @@ def read(self) -> DataFrame: """ pyarrow_csv = import_optional_dependency("pyarrow.csv") self._get_pyarrow_options() - + print(self.read_options) table = pyarrow_csv.read_csv( self.src, read_options=pyarrow_csv.ReadOptions(**self.read_options), diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 5df8eb5dff460..e554d6a8cbb62 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -12,6 +12,9 @@ ) import pandas._testing as tm +from pandas.io.parsers import readers +from pandas.io.parsers.arrow_parser_wrapper import ArrowParserWrapper + class BaseParser: engine: str | None = None @@ -101,20 +104,30 @@ def csv1(datapath): @pytest.fixture(params=_all_parsers, ids=_all_parser_ids) -def all_parsers(request): +def all_parsers(request, monkeypatch): """ Fixture all of the CSV parsers. """ parser = request.param() if parser.engine == "pyarrow": pytest.importorskip("pyarrow", VERSIONS["pyarrow"]) - # Try setting num cpus to 1 to avoid hangs in CI - # TODO: Mark these as pytest.mark.single GH 44584 - # or better somehow pass use_thread=False into pyarrow + import pyarrow + # TODO: Probably mark these as pytest.mark.single GH 44584 + # Disable threads in CI environment to avoid timeouts pyarrow.set_cpu_count(1) - return parser + + class NoThreadArrowParserWrapper(ArrowParserWrapper): + def _get_pyarrow_options(self): + super()._get_pyarrow_options() + self.read_options["use_threads"] = False + + with monkeypatch.context() as m: + m.setattr(readers, "ArrowParserWrapper", NoThreadArrowParserWrapper) + yield parser + else: + yield parser @pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids) From 4aaeb8d03e761b922102a41033bdfc7f2564fcdf Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 29 Jan 2022 11:24:38 -0800 Subject: [PATCH 48/81] Remove print --- pandas/io/parsers/arrow_parser_wrapper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index 512b21b02c2eb..ef1c7a1db36d0 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -145,7 +145,6 @@ def read(self) -> DataFrame: """ pyarrow_csv = import_optional_dependency("pyarrow.csv") self._get_pyarrow_options() - print(self.read_options) table = pyarrow_csv.read_csv( self.src, read_options=pyarrow_csv.ReadOptions(**self.read_options), From 59afb3860bd4b53be390722ded535aeb2d0f4bd9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jan 2022 11:21:28 -0800 Subject: [PATCH 49/81] undo whitespace --- pandas/io/parsers/arrow_parser_wrapper.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/io/parsers/arrow_parser_wrapper.py b/pandas/io/parsers/arrow_parser_wrapper.py index ef1c7a1db36d0..618da9d33c490 100644 --- a/pandas/io/parsers/arrow_parser_wrapper.py +++ b/pandas/io/parsers/arrow_parser_wrapper.py @@ -145,6 +145,7 @@ def read(self) -> DataFrame: """ pyarrow_csv = import_optional_dependency("pyarrow.csv") self._get_pyarrow_options() + table = pyarrow_csv.read_csv( self.src, read_options=pyarrow_csv.ReadOptions(**self.read_options), From e094c709dcfe7b16ea96463c32e9340198ed4d73 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jan 2022 11:23:06 -0800 Subject: [PATCH 50/81] Undo leftover test mods --- pandas/tests/io/excel/conftest.py | 14 +++----------- pandas/tests/io/excel/test_readers.py | 21 ++++++++++++--------- pandas/tests/io/parser/conftest.py | 6 ++---- 3 files changed, 17 insertions(+), 24 deletions(-) diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py index cbaef6c9f6b27..0455e0d61ad97 100644 --- a/pandas/tests/io/excel/conftest.py +++ b/pandas/tests/io/excel/conftest.py @@ -1,6 +1,5 @@ import pytest -from pandas.compat import is_platform_windows import pandas.util._test_decorators as td import pandas._testing as tm @@ -44,8 +43,7 @@ def read_ext(request): return request.param -# Checking for file leaks can hang on Windows CI -@pytest.fixture(autouse=not is_platform_windows()) +@pytest.fixture(autouse=True) def check_for_file_leaks(): """ Fixture to run around every test to ensure that we are not leaking files. @@ -60,14 +58,8 @@ def check_for_file_leaks(): yield else: - # Only care about excel files in this conftest - exts = [".xls", ".xlsx", ".xlsm", ".ods", ".xlsb"] proc = psutil.Process() - flist = [ - f for f in proc.open_files() if any(f.path.endswith(ext) for ext in exts) - ] + flist = proc.open_files() yield - flist2 = [ - f for f in proc.open_files() if any(f.path.endswith(ext) for ext in exts) - ] + flist2 = proc.open_files() assert flist == flist2 diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index ff299deb5b9fa..589c98721f139 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -131,6 +131,8 @@ def test_engine_used(self, read_ext, engine, monkeypatch): def parser(self, *args, **kwargs): return self.engine + monkeypatch.setattr(pd.ExcelFile, "parse", parser) + expected_defaults = { "xlsx": "openpyxl", "xlsm": "openpyxl", @@ -138,10 +140,9 @@ def parser(self, *args, **kwargs): "xls": "xlrd", "ods": "odf", } - with monkeypatch.context() as m: - m.setattr(pd.ExcelFile, "parse", parser) - with open("test1" + read_ext, "rb") as f: - result = pd.read_excel(f) + + with open("test1" + read_ext, "rb") as f: + result = pd.read_excel(f) if engine is not None: expected = engine @@ -149,7 +150,9 @@ def parser(self, *args, **kwargs): expected = expected_defaults[read_ext[1:]] assert result == expected - def test_usecols_int(self, read_ext): + def test_usecols_int(self, read_ext, df_ref): + df_ref = df_ref.reindex(columns=["A", "B", "C"]) + # usecols as int msg = "Passing an integer for `usecols`" with pytest.raises(ValueError, match=msg): @@ -718,7 +721,7 @@ def test_excel_read_buffer(self, read_ext): actual = pd.read_excel(f, sheet_name="Sheet1", index_col=0) tm.assert_frame_equal(expected, actual) - def test_bad_engine_raises(self): + def test_bad_engine_raises(self, read_ext): bad_engine = "foo" with pytest.raises(ValueError, match="Unknown engine: foo"): pd.read_excel("", engine=bad_engine) @@ -740,7 +743,7 @@ def test_missing_file_raises(self, read_ext): with pytest.raises(FileNotFoundError, match=match): pd.read_excel(bad_file) - def test_corrupt_bytes_raises(self, engine): + def test_corrupt_bytes_raises(self, read_ext, engine): bad_stream = b"foo" if engine is None: error = ValueError @@ -1284,7 +1287,7 @@ def test_ignore_chartsheets_by_int(self, request, engine, read_ext): ): pd.read_excel("chartsheet" + read_ext, sheet_name=1) - def test_euro_decimal_format(self, read_ext): + def test_euro_decimal_format(self, request, read_ext): # copied from read_csv result = pd.read_excel("test_decimal" + read_ext, decimal=",", skiprows=1) expected = DataFrame( @@ -1308,7 +1311,7 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch): monkeypatch.chdir(datapath("io", "data", "excel")) monkeypatch.setattr(pd, "ExcelFile", func) - def test_engine_used(self, read_ext, engine): + def test_engine_used(self, read_ext, engine, monkeypatch): expected_defaults = { "xlsx": "openpyxl", "xlsm": "openpyxl", diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 433712d998eaa..2070057aff10b 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -101,16 +101,14 @@ def csv1(datapath): @pytest.fixture(params=_all_parsers, ids=_all_parser_ids) -def all_parsers(request, monkeypatch): +def all_parsers(request): """ Fixture all of the CSV parsers. """ parser = request.param() if parser.engine == "pyarrow": pytest.importorskip("pyarrow", VERSIONS["pyarrow"]) - # Try setting num cpus to 1 to avoid hangs on Azure MacOS/Windows builds - # or better yet find a way to disable threads - # TODO(GH#44584) pytest.mark.single these tests + # Try setting num cpus to 1 to avoid hangs? import pyarrow pyarrow.set_cpu_count(1) From 6159811e4cb001f910acef40a2f59c46e51d21d0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jan 2022 11:24:37 -0800 Subject: [PATCH 51/81] My local main wasn't updated --- pandas/tests/io/excel/conftest.py | 4 +++- pandas/tests/io/excel/test_readers.py | 12 +++++------- pandas/tests/io/parser/conftest.py | 4 +++- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/pandas/tests/io/excel/conftest.py b/pandas/tests/io/excel/conftest.py index 0455e0d61ad97..4ce06c01892d9 100644 --- a/pandas/tests/io/excel/conftest.py +++ b/pandas/tests/io/excel/conftest.py @@ -1,5 +1,6 @@ import pytest +from pandas.compat import is_platform_windows import pandas.util._test_decorators as td import pandas._testing as tm @@ -43,7 +44,8 @@ def read_ext(request): return request.param -@pytest.fixture(autouse=True) +# Checking for file leaks can hang on Windows CI +@pytest.fixture(autouse=not is_platform_windows()) def check_for_file_leaks(): """ Fixture to run around every test to ensure that we are not leaking files. diff --git a/pandas/tests/io/excel/test_readers.py b/pandas/tests/io/excel/test_readers.py index 589c98721f139..2af8b1ab31403 100644 --- a/pandas/tests/io/excel/test_readers.py +++ b/pandas/tests/io/excel/test_readers.py @@ -150,9 +150,7 @@ def parser(self, *args, **kwargs): expected = expected_defaults[read_ext[1:]] assert result == expected - def test_usecols_int(self, read_ext, df_ref): - df_ref = df_ref.reindex(columns=["A", "B", "C"]) - + def test_usecols_int(self, read_ext): # usecols as int msg = "Passing an integer for `usecols`" with pytest.raises(ValueError, match=msg): @@ -721,7 +719,7 @@ def test_excel_read_buffer(self, read_ext): actual = pd.read_excel(f, sheet_name="Sheet1", index_col=0) tm.assert_frame_equal(expected, actual) - def test_bad_engine_raises(self, read_ext): + def test_bad_engine_raises(self): bad_engine = "foo" with pytest.raises(ValueError, match="Unknown engine: foo"): pd.read_excel("", engine=bad_engine) @@ -743,7 +741,7 @@ def test_missing_file_raises(self, read_ext): with pytest.raises(FileNotFoundError, match=match): pd.read_excel(bad_file) - def test_corrupt_bytes_raises(self, read_ext, engine): + def test_corrupt_bytes_raises(self, engine): bad_stream = b"foo" if engine is None: error = ValueError @@ -1287,7 +1285,7 @@ def test_ignore_chartsheets_by_int(self, request, engine, read_ext): ): pd.read_excel("chartsheet" + read_ext, sheet_name=1) - def test_euro_decimal_format(self, request, read_ext): + def test_euro_decimal_format(self, read_ext): # copied from read_csv result = pd.read_excel("test_decimal" + read_ext, decimal=",", skiprows=1) expected = DataFrame( @@ -1311,7 +1309,7 @@ def cd_and_set_engine(self, engine, datapath, monkeypatch): monkeypatch.chdir(datapath("io", "data", "excel")) monkeypatch.setattr(pd, "ExcelFile", func) - def test_engine_used(self, read_ext, engine, monkeypatch): + def test_engine_used(self, read_ext, engine): expected_defaults = { "xlsx": "openpyxl", "xlsm": "openpyxl", diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 2070057aff10b..b2d2be362d0d3 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -108,7 +108,9 @@ def all_parsers(request): parser = request.param() if parser.engine == "pyarrow": pytest.importorskip("pyarrow", VERSIONS["pyarrow"]) - # Try setting num cpus to 1 to avoid hangs? + # Try setting num cpus to 1 to avoid hangs on Azure MacOS/Windows builds + # or better yet find a way to disable threads + # TODO(GH#44584) pytest.mark.single these tests import pyarrow pyarrow.set_cpu_count(1) From 64028867d8e85ba78ed9273ae4dae6cccab696cc Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jan 2022 18:08:00 -0800 Subject: [PATCH 52/81] Live with mulithead pyarrow --- .github/workflows/datamanger.yml | 3 --- .github/workflows/posix.yml | 3 --- azure-pipelines.yml | 3 --- 3 files changed, 9 deletions(-) diff --git a/.github/workflows/datamanger.yml b/.github/workflows/datamanger.yml index 889b225c0c69d..368c770ad5f14 100644 --- a/.github/workflows/datamanger.yml +++ b/.github/workflows/datamanger.yml @@ -15,9 +15,6 @@ on: env: ENV_FILE: environment.yml PANDAS_CI: 1 - # Avoid hanging threads in pyarrow - OMP_NUM_THREADS: 1 - OMP_THREAD_LIMIT: 1 jobs: data_manager: diff --git a/.github/workflows/posix.yml b/.github/workflows/posix.yml index 08b891e2c58b6..8080a81519d8f 100644 --- a/.github/workflows/posix.yml +++ b/.github/workflows/posix.yml @@ -15,9 +15,6 @@ on: env: PYTEST_WORKERS: "auto" PANDAS_CI: 1 - # Avoid hanging threads in pyarrow - OMP_NUM_THREADS: 1 - OMP_THREAD_LIMIT: 1 jobs: pytest: diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 4ebde302ee48d..f597f7dd4f672 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -20,9 +20,6 @@ variables: PYTEST_TARGET: pandas PATTERN: "not slow and not high_memory and not db" PANDAS_CI: 1 - # Avoid hanging threads in pyarrow - OMP_NUM_THREADS: 1 - OMP_THREAD_LIMIT: 1 jobs: - template: ci/azure/posix.yml From b09445844c4e259e499fc0fbe5d539007cc04921 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jan 2022 22:09:12 -0800 Subject: [PATCH 53/81] Try changing macos image for pyarrow parquet issues --- azure-pipelines.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f597f7dd4f672..68b826df8626d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -25,7 +25,9 @@ jobs: - template: ci/azure/posix.yml parameters: name: macOS - vmImage: macOS-latest + # pyarrow-parquet issues when using macOS-latest (macOS-11 as of writing) + # Library not loaded: @rpath/libssl.1.1.dylib + vmImage: macOS-10.15 - template: ci/azure/windows.yml parameters: From 4945a1d7e86e3484091ab51c9ee8199f6a81a302 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jan 2022 22:57:42 -0800 Subject: [PATCH 54/81] Revert "Try changing macos image for pyarrow parquet issues" This reverts commit b09445844c4e259e499fc0fbe5d539007cc04921. --- azure-pipelines.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 68b826df8626d..f597f7dd4f672 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -25,9 +25,7 @@ jobs: - template: ci/azure/posix.yml parameters: name: macOS - # pyarrow-parquet issues when using macOS-latest (macOS-11 as of writing) - # Library not loaded: @rpath/libssl.1.1.dylib - vmImage: macOS-10.15 + vmImage: macOS-latest - template: ci/azure/windows.yml parameters: From a57e6506d873d50751682a5f29ec3f4716054bb9 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 30 Jan 2022 23:24:14 -0800 Subject: [PATCH 55/81] Try this way --- ci/azure/posix.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index be76e1366f5ca..172f477aca55e 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -25,7 +25,11 @@ jobs: - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' displayName: 'Set conda path' - - script: ci/setup_env.sh + # Install openssl for pyarrow-parquet error + # Library not loaded: @rpath/libssl.1.1.dylib + - script: | + ci/setup_env.sh + conda install -c anaconda openssl displayName: 'Setup environment and build pandas' - script: | From 5221d14ffcb933be162b69aedd26862dc23a886c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 31 Jan 2022 08:27:13 -0800 Subject: [PATCH 56/81] Reinstall pyarrow? --- ci/azure/posix.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 172f477aca55e..e71e95954571b 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -25,11 +25,12 @@ jobs: - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' displayName: 'Set conda path' - # Install openssl for pyarrow-parquet error + # Re-install pyarrow for occasional pyarrow-parquet error # Library not loaded: @rpath/libssl.1.1.dylib - script: | ci/setup_env.sh - conda install -c anaconda openssl + conda remove pyarrow --yes + conda install -c conda-forge pyarrow displayName: 'Setup environment and build pandas' - script: | From 24eded660094030fa89d7edb081ff23e78f6649c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 31 Jan 2022 13:31:40 -0800 Subject: [PATCH 57/81] Don't run network as well --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index f597f7dd4f672..21cb5bac25be8 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -18,7 +18,7 @@ pr: variables: PYTEST_WORKERS: auto PYTEST_TARGET: pandas - PATTERN: "not slow and not high_memory and not db" + PATTERN: "not slow and not high_memory and not db and not network" PANDAS_CI: 1 jobs: From cb4a92e933a2b0e0426415ae77bdee848e908f58 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 31 Jan 2022 13:36:39 -0800 Subject: [PATCH 58/81] try pyarrow 6 --- ci/azure/posix.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index e71e95954571b..906fa54c84927 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -30,7 +30,7 @@ jobs: - script: | ci/setup_env.sh conda remove pyarrow --yes - conda install -c conda-forge pyarrow + conda install -c conda-forge pyarrow=6 displayName: 'Setup environment and build pandas' - script: | From 5dd03ba1c15e3aaffc1b6c9feaa1dedb4231066a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 31 Jan 2022 13:37:02 -0800 Subject: [PATCH 59/81] Need to view again --- ci/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index f558898976cd9..3d3119f10e3e7 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -24,7 +24,7 @@ if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then XVFB="xvfb-run " fi -PYTEST_CMD="${XVFB}pytest -r fEs -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="${XVFB}pytest -v -r fEs -m \"$PATTERN\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" echo $PYTEST_CMD sh -c "$PYTEST_CMD" From 67b5d490d7b697abdd21bd70513339fce7aebfce Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 31 Jan 2022 19:26:40 -0800 Subject: [PATCH 60/81] Xfail for flaky macos pyarrow parquet thing --- ci/azure/posix.yml | 5 +---- pandas/tests/io/test_common.py | 8 +++++++- pandas/tests/io/test_fsspec.py | 7 +++++++ pandas/tests/io/test_parquet.py | 35 ++++++++++++++++++++++++++++++++- 4 files changed, 49 insertions(+), 6 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 906fa54c84927..71cb62117b0e3 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -27,10 +27,7 @@ jobs: # Re-install pyarrow for occasional pyarrow-parquet error # Library not loaded: @rpath/libssl.1.1.dylib - - script: | - ci/setup_env.sh - conda remove pyarrow --yes - conda install -c conda-forge pyarrow=6 + - script: ci/setup_env.sh displayName: 'Setup environment and build pandas' - script: | diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 7b7918a323c99..1fd540aba5c07 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -236,9 +236,15 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex path = os.path.join(HERE, "data", "missing_folder", "does_not_exist." + fn_ext) + msg = r"|".join( + [ + r"[Errno 2] No such", + r"Cannot save file into a non-existent directory: .*missing_folder", + ] + ) with pytest.raises( error_class, - match=r"Cannot save file into a non-existent directory: .*missing_folder", + match=msg, ): method(dummy_frame, path) diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index f1040c0bd30f2..c2c8db4cd7160 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -3,6 +3,7 @@ import numpy as np import pytest +from pandas.compat import is_platform_mac from pandas.compat._optional import VERSIONS from pandas import ( @@ -160,6 +161,12 @@ def test_to_parquet_new_file(monkeypatch, cleared_fs): ) +@pytest.mark.xfail( + is_platform_mac(), + raises=ImportError, + reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", + strict=False, +) @td.skip_if_no("pyarrow", min_version="2") def test_arrowparquet_options(fsspectest): """Regression test for writing to a not-yet-existent GCS Parquet file.""" diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b57923093e3e8..158658d125794 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -13,7 +13,10 @@ from pandas._config import get_option -from pandas.compat import is_platform_windows +from pandas.compat import ( + is_platform_mac, + is_platform_windows, +) from pandas.compat.pyarrow import ( pa_version_under2p0, pa_version_under5p0, @@ -255,6 +258,12 @@ def test_invalid_engine(df_compat): check_round_trip(df_compat, "foo", "bar") +@pytest.mark.xfail( + is_platform_mac(), + raises=ImportError, + reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", + strict=False, +) def test_options_py(df_compat, pa): # use the set option @@ -276,6 +285,12 @@ def test_options_auto(df_compat, fp, pa): check_round_trip(df_compat) +@pytest.mark.xfail( + is_platform_mac(), + raises=ImportError, + reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", + strict=False, +) def test_options_get_engine(fp, pa): assert isinstance(get_engine("pyarrow"), PyArrowImpl) assert isinstance(get_engine("fastparquet"), FastParquetImpl) @@ -339,6 +354,12 @@ def test_get_engine_auto_error_message(): get_engine("auto") +@pytest.mark.xfail( + is_platform_mac(), + raises=ImportError, + reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", + strict=False, +) def test_cross_engine_pa_fp(df_cross_compat, pa, fp): # cross-compat with differing reading/writing engines @@ -393,6 +414,12 @@ def test_parquet_read_from_url(self, df_compat, engine): tm.assert_frame_equal(df, df_compat) +@pytest.mark.xfail( + is_platform_mac(), + raises=ImportError, + reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", + strict=False, +) class TestBasic(Base): def test_error(self, engine): for obj in [ @@ -668,6 +695,12 @@ def test_read_empty_array(self, pa, dtype): check_round_trip(df, pa, read_kwargs={"use_nullable_dtypes": True}) +@pytest.mark.xfail( + is_platform_mac(), + raises=ImportError, + reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", + strict=False, +) @pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): From 80a73a9329dbecbbfa2a7f01ff879a4c3ea2b434 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 31 Jan 2022 19:26:55 -0800 Subject: [PATCH 61/81] Xfail for flaky macos pyarrow parquet thing --- pandas/tests/io/test_common.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 1fd540aba5c07..92fffe0d0406d 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -242,10 +242,7 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex r"Cannot save file into a non-existent directory: .*missing_folder", ] ) - with pytest.raises( - error_class, - match=msg, - ): + with pytest.raises(error_class, match=msg): method(dummy_frame, path) @pytest.mark.parametrize( From f5a76aedd30773fec3695a48a7da92c11e6b1b58 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 31 Jan 2022 21:24:27 -0800 Subject: [PATCH 62/81] Address more pyarrow compat --- pandas/tests/io/test_common.py | 7 ++++++- pandas/tests/io/test_parquet.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 92fffe0d0406d..4eeef005366e3 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -222,7 +222,12 @@ def test_read_non_existent(self, reader, module, error_class, fn_ext): (pd.DataFrame.to_html, "os", OSError, "html"), (pd.DataFrame.to_excel, "xlrd", OSError, "xlsx"), (pd.DataFrame.to_feather, "pyarrow", OSError, "feather"), - (pd.DataFrame.to_parquet, "pyarrow", OSError, "parquet"), + ( + pd.DataFrame.to_parquet, + "pyarrow", + (OSError, FileNotFoundError), + "parquet", + ), (pd.DataFrame.to_stata, "os", OSError, "dta"), (pd.DataFrame.to_json, "os", OSError, "json"), (pd.DataFrame.to_pickle, "os", OSError, "pickle"), diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 158658d125794..738c78e3bc1a5 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -374,7 +374,13 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp): tm.assert_frame_equal(result, df[["a", "d"]]) -def test_cross_engine_fp_pa(request, df_cross_compat, pa, fp): +@pytest.mark.xfail( + is_platform_mac(), + raises=ImportError, + reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", + strict=False, +) +def test_cross_engine_fp_pa(df_cross_compat, pa, fp): # cross-compat with differing reading/writing engines df = df_cross_compat with tm.ensure_clean() as path: @@ -868,7 +874,7 @@ def test_s3_roundtrip_for_dir( repeat=1, ) - @td.skip_if_no("pyarrow") + @td.skip_if_no("pyarrow", min_version="3") def test_read_file_like_obj_support(self, df_compat): buffer = BytesIO() df_compat.to_parquet(buffer) From f7b5a7553d235e5167f2a88669d921ff09a9cbe2 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Mon, 31 Jan 2022 23:44:07 -0800 Subject: [PATCH 63/81] Escape the brackets --- pandas/tests/io/test_common.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 4eeef005366e3..78b9b2c2d2504 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -241,10 +241,10 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex path = os.path.join(HERE, "data", "missing_folder", "does_not_exist." + fn_ext) - msg = r"|".join( + msg = "|".join( [ - r"[Errno 2] No such", - r"Cannot save file into a non-existent directory: .*missing_folder", + r"\[Errno 2\] No such", + "Cannot save file into a non-existent directory: .*missing_folder", ] ) with pytest.raises(error_class, match=msg): From 9b660faa164beb385d1bdab359b238dc1342d160 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 1 Feb 2022 08:59:58 -0800 Subject: [PATCH 64/81] Try using the old OS again --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 21cb5bac25be8..17a9397a25f3e 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -25,7 +25,7 @@ jobs: - template: ci/azure/posix.yml parameters: name: macOS - vmImage: macOS-latest + vmImage: macOS-10.15 - template: ci/azure/windows.yml parameters: From 2be094e3b03d588f69a319ad5a199caf147def1f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 1 Feb 2022 14:44:23 -0800 Subject: [PATCH 65/81] Add back latest --- azure-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 17a9397a25f3e..21cb5bac25be8 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -25,7 +25,7 @@ jobs: - template: ci/azure/posix.yml parameters: name: macOS - vmImage: macOS-10.15 + vmImage: macOS-latest - template: ci/azure/windows.yml parameters: From 8177c2230290478e695414c343b89831b7c567e4 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 1 Feb 2022 14:49:17 -0800 Subject: [PATCH 66/81] Check arraymanager tests --- ci/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 3d3119f10e3e7..786bb0c6b6150 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -31,7 +31,7 @@ sh -c "$PYTEST_CMD" if [[ "$PANDAS_DATA_MANAGER" != "array" ]]; then # The ArrayManager tests should have already been run by PYTEST_CMD if PANDAS_DATA_MANAGER was already set to array - PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -m \"$PATTERN and arraymanager\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE pandas" + PYTEST_AM_CMD="PANDAS_DATA_MANAGER=array pytest -v -m \"$PATTERN and arraymanager\" -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE pandas" echo $PYTEST_AM_CMD sh -c "$PYTEST_AM_CMD" From feab7eed93b9a8645bc34b66511186a50613289e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Tue, 1 Feb 2022 18:48:51 -0800 Subject: [PATCH 67/81] mRevert and recover the issue again --- pandas/tests/io/test_common.py | 18 ++++--------- pandas/tests/io/test_fsspec.py | 7 ----- pandas/tests/io/test_parquet.py | 45 +++------------------------------ 3 files changed, 8 insertions(+), 62 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index 2397ff0d5646d..adf4f32837acf 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -222,12 +222,7 @@ def test_read_non_existent(self, reader, module, error_class, fn_ext): (pd.DataFrame.to_html, "os", OSError, "html"), (pd.DataFrame.to_excel, "xlrd", OSError, "xlsx"), (pd.DataFrame.to_feather, "pyarrow", OSError, "feather"), - ( - pd.DataFrame.to_parquet, - "pyarrow", - (OSError, FileNotFoundError), - "parquet", - ), + (pd.DataFrame.to_parquet, "pyarrow", OSError, "parquet"), (pd.DataFrame.to_stata, "os", OSError, "dta"), (pd.DataFrame.to_json, "os", OSError, "json"), (pd.DataFrame.to_pickle, "os", OSError, "pickle"), @@ -241,13 +236,10 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex path = os.path.join(HERE, "data", "missing_folder", "does_not_exist." + fn_ext) - msg = "|".join( - [ - r"\[Errno 2\] No such", - "Cannot save file into a non-existent directory: .*missing_folder", - ] - ) - with pytest.raises(error_class, match=msg): + with pytest.raises( + error_class, + match=r"Cannot save file into a non-existent directory: .*missing_folder", + ): method(dummy_frame, path) @pytest.mark.parametrize( diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index c2c8db4cd7160..f1040c0bd30f2 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -3,7 +3,6 @@ import numpy as np import pytest -from pandas.compat import is_platform_mac from pandas.compat._optional import VERSIONS from pandas import ( @@ -161,12 +160,6 @@ def test_to_parquet_new_file(monkeypatch, cleared_fs): ) -@pytest.mark.xfail( - is_platform_mac(), - raises=ImportError, - reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", - strict=False, -) @td.skip_if_no("pyarrow", min_version="2") def test_arrowparquet_options(fsspectest): """Regression test for writing to a not-yet-existent GCS Parquet file.""" diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 738c78e3bc1a5..b57923093e3e8 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -13,10 +13,7 @@ from pandas._config import get_option -from pandas.compat import ( - is_platform_mac, - is_platform_windows, -) +from pandas.compat import is_platform_windows from pandas.compat.pyarrow import ( pa_version_under2p0, pa_version_under5p0, @@ -258,12 +255,6 @@ def test_invalid_engine(df_compat): check_round_trip(df_compat, "foo", "bar") -@pytest.mark.xfail( - is_platform_mac(), - raises=ImportError, - reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", - strict=False, -) def test_options_py(df_compat, pa): # use the set option @@ -285,12 +276,6 @@ def test_options_auto(df_compat, fp, pa): check_round_trip(df_compat) -@pytest.mark.xfail( - is_platform_mac(), - raises=ImportError, - reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", - strict=False, -) def test_options_get_engine(fp, pa): assert isinstance(get_engine("pyarrow"), PyArrowImpl) assert isinstance(get_engine("fastparquet"), FastParquetImpl) @@ -354,12 +339,6 @@ def test_get_engine_auto_error_message(): get_engine("auto") -@pytest.mark.xfail( - is_platform_mac(), - raises=ImportError, - reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", - strict=False, -) def test_cross_engine_pa_fp(df_cross_compat, pa, fp): # cross-compat with differing reading/writing engines @@ -374,13 +353,7 @@ def test_cross_engine_pa_fp(df_cross_compat, pa, fp): tm.assert_frame_equal(result, df[["a", "d"]]) -@pytest.mark.xfail( - is_platform_mac(), - raises=ImportError, - reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", - strict=False, -) -def test_cross_engine_fp_pa(df_cross_compat, pa, fp): +def test_cross_engine_fp_pa(request, df_cross_compat, pa, fp): # cross-compat with differing reading/writing engines df = df_cross_compat with tm.ensure_clean() as path: @@ -420,12 +393,6 @@ def test_parquet_read_from_url(self, df_compat, engine): tm.assert_frame_equal(df, df_compat) -@pytest.mark.xfail( - is_platform_mac(), - raises=ImportError, - reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", - strict=False, -) class TestBasic(Base): def test_error(self, engine): for obj in [ @@ -701,12 +668,6 @@ def test_read_empty_array(self, pa, dtype): check_round_trip(df, pa, read_kwargs={"use_nullable_dtypes": True}) -@pytest.mark.xfail( - is_platform_mac(), - raises=ImportError, - reason="Raises Library not loaded: @rpath/libssl.1.1.dylib in CI", - strict=False, -) @pytest.mark.filterwarnings("ignore:CategoricalBlock is deprecated:DeprecationWarning") class TestParquetPyArrow(Base): def test_basic(self, pa, df_full): @@ -874,7 +835,7 @@ def test_s3_roundtrip_for_dir( repeat=1, ) - @td.skip_if_no("pyarrow", min_version="3") + @td.skip_if_no("pyarrow") def test_read_file_like_obj_support(self, df_compat): buffer = BytesIO() df_compat.to_parquet(buffer) From 11ad11d3beee92a9aef75498d7a693b30a9716f8 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 2 Feb 2022 10:37:54 -0800 Subject: [PATCH 68/81] Change number of workers --- azure-pipelines.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 21cb5bac25be8..30534ac27f42d 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -16,7 +16,8 @@ pr: - 1.4.x variables: - PYTEST_WORKERS: auto + # Not auto to (hopefully) stabilize threading timeouts from pyarrow + PYTEST_WORKERS: 2 PYTEST_TARGET: pandas PATTERN: "not slow and not high_memory and not db and not network" PANDAS_CI: 1 From c69674017a2ff136913512bf39eece91df8a957b Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 2 Feb 2022 11:02:27 -0800 Subject: [PATCH 69/81] Try passing use_threads in pyarrow again --- pandas/tests/io/parser/conftest.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index b2d2be362d0d3..07ecff419d2bd 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -12,6 +12,9 @@ ) import pandas._testing as tm +from pandas.io.parsers import readers +from pandas.io.parsers.arrow_parser_wrapper import ArrowParserWrapper + class BaseParser: engine: str | None = None @@ -101,20 +104,24 @@ def csv1(datapath): @pytest.fixture(params=_all_parsers, ids=_all_parser_ids) -def all_parsers(request): +def all_parsers(request, monkeypatch): """ Fixture all of the CSV parsers. """ parser = request.param() if parser.engine == "pyarrow": pytest.importorskip("pyarrow", VERSIONS["pyarrow"]) - # Try setting num cpus to 1 to avoid hangs on Azure MacOS/Windows builds - # or better yet find a way to disable threads - # TODO(GH#44584) pytest.mark.single these tests - import pyarrow - pyarrow.set_cpu_count(1) - return parser + class NoThreadArrowParserWrapper(ArrowParserWrapper): + def _get_pyarrow_options(self): + super()._get_pyarrow_options() + self.read_options["use_threads"] = False + + with monkeypatch.context() as m: + m.setattr(readers, "ArrowParserWrapper", NoThreadArrowParserWrapper) + yield parser + else: + yield parser @pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids) From 985542b7ce9a3570db037851df162cab238453e5 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 2 Feb 2022 14:14:03 -0800 Subject: [PATCH 70/81] Revert "Try passing use_threads in pyarrow again" This reverts commit c69674017a2ff136913512bf39eece91df8a957b. --- pandas/tests/io/parser/conftest.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 07ecff419d2bd..b2d2be362d0d3 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -12,9 +12,6 @@ ) import pandas._testing as tm -from pandas.io.parsers import readers -from pandas.io.parsers.arrow_parser_wrapper import ArrowParserWrapper - class BaseParser: engine: str | None = None @@ -104,24 +101,20 @@ def csv1(datapath): @pytest.fixture(params=_all_parsers, ids=_all_parser_ids) -def all_parsers(request, monkeypatch): +def all_parsers(request): """ Fixture all of the CSV parsers. """ parser = request.param() if parser.engine == "pyarrow": pytest.importorskip("pyarrow", VERSIONS["pyarrow"]) + # Try setting num cpus to 1 to avoid hangs on Azure MacOS/Windows builds + # or better yet find a way to disable threads + # TODO(GH#44584) pytest.mark.single these tests + import pyarrow - class NoThreadArrowParserWrapper(ArrowParserWrapper): - def _get_pyarrow_options(self): - super()._get_pyarrow_options() - self.read_options["use_threads"] = False - - with monkeypatch.context() as m: - m.setattr(readers, "ArrowParserWrapper", NoThreadArrowParserWrapper) - yield parser - else: - yield parser + pyarrow.set_cpu_count(1) + return parser @pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids) From 84ed1fb5d0f99078a927ac193f1d1b8237815b5f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 2 Feb 2022 14:14:15 -0800 Subject: [PATCH 71/81] Revert "Change number of workers" This reverts commit 11ad11d3beee92a9aef75498d7a693b30a9716f8. --- azure-pipelines.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 30534ac27f42d..21cb5bac25be8 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -16,8 +16,7 @@ pr: - 1.4.x variables: - # Not auto to (hopefully) stabilize threading timeouts from pyarrow - PYTEST_WORKERS: 2 + PYTEST_WORKERS: auto PYTEST_TARGET: pandas PATTERN: "not slow and not high_memory and not db and not network" PANDAS_CI: 1 From d41fa2636de7402edc33600cbca98780297970c1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Wed, 2 Feb 2022 14:39:03 -0800 Subject: [PATCH 72/81] Try reinstalling pyarrow again --- ci/azure/posix.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 71cb62117b0e3..f34cf221c8297 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -27,7 +27,10 @@ jobs: # Re-install pyarrow for occasional pyarrow-parquet error # Library not loaded: @rpath/libssl.1.1.dylib - - script: ci/setup_env.sh + - script: | + ci/setup_env.sh + conda remove -n pandas-dev --yes pyarrow + conda install -n pandas-dev -c conda-forge pyarrow displayName: 'Setup environment and build pandas' - script: | From 777c977fe764bdf711a104961d4b5af78269ee00 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 3 Feb 2022 12:14:00 -0800 Subject: [PATCH 73/81] Try upgrading pyarrow instead --- ci/azure/posix.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index f34cf221c8297..2074af310edc4 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -25,12 +25,11 @@ jobs: - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' displayName: 'Set conda path' - # Re-install pyarrow for occasional pyarrow-parquet error + # Upgrade pyarrow for occasional pyarrow-parquet error # Library not loaded: @rpath/libssl.1.1.dylib - script: | ci/setup_env.sh - conda remove -n pandas-dev --yes pyarrow - conda install -n pandas-dev -c conda-forge pyarrow + conda update -n pandas-dev --yes pyarrow displayName: 'Setup environment and build pandas' - script: | From 9cbc5d3b6c85667b5e7244b8bdd5f85a92e3b86c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 3 Feb 2022 13:57:06 -0800 Subject: [PATCH 74/81] Remove fastparquet from macos --- ci/azure/posix.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index 2074af310edc4..f85776f1e40af 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -25,11 +25,11 @@ jobs: - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' displayName: 'Set conda path' - # Upgrade pyarrow for occasional pyarrow-parquet error + # Remove fastparquet for occasional pyarrow-parquet error # Library not loaded: @rpath/libssl.1.1.dylib - script: | ci/setup_env.sh - conda update -n pandas-dev --yes pyarrow + conda remove -n pandas-dev --yes fastparquet displayName: 'Setup environment and build pandas' - script: | From b994e24c300e14ce40ee151f7e3ad36e91231391 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 3 Feb 2022 15:16:06 -0800 Subject: [PATCH 75/81] specify conda forge? --- ci/azure/posix.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index f85776f1e40af..deb1ea894f9ae 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -29,7 +29,7 @@ jobs: # Library not loaded: @rpath/libssl.1.1.dylib - script: | ci/setup_env.sh - conda remove -n pandas-dev --yes fastparquet + conda remove -n pandas-dev -c conda-forge --yes fastparquet displayName: 'Setup environment and build pandas' - script: | From 41b17a066fc31fefb5e60f240ccb5830637c3958 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 3 Feb 2022 16:50:48 -0800 Subject: [PATCH 76/81] Try upgrading pyarrow again --- ci/azure/posix.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index deb1ea894f9ae..c3b5e0024cd40 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -25,11 +25,11 @@ jobs: - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' displayName: 'Set conda path' - # Remove fastparquet for occasional pyarrow-parquet error + # Upgrade pyarrow for occasional pyarrow-parquet error # Library not loaded: @rpath/libssl.1.1.dylib - script: | ci/setup_env.sh - conda remove -n pandas-dev -c conda-forge --yes fastparquet + conda upgrade -n pandas-dev -c conda-forge --yes pyarrow displayName: 'Setup environment and build pandas' - script: | From 13742d75b94fead348cdfe7e5d08e3524368c9c7 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Thu, 3 Feb 2022 19:50:25 -0800 Subject: [PATCH 77/81] Remove fastparquet --- ci/azure/posix.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index c3b5e0024cd40..deb1ea894f9ae 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -25,11 +25,11 @@ jobs: - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' displayName: 'Set conda path' - # Upgrade pyarrow for occasional pyarrow-parquet error + # Remove fastparquet for occasional pyarrow-parquet error # Library not loaded: @rpath/libssl.1.1.dylib - script: | ci/setup_env.sh - conda upgrade -n pandas-dev -c conda-forge --yes pyarrow + conda remove -n pandas-dev -c conda-forge --yes fastparquet displayName: 'Setup environment and build pandas' - script: | From 8852ae45a1041352a7c1864f75c4d6bdffd6fd29 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 4 Feb 2022 10:38:00 -0800 Subject: [PATCH 78/81] Force uninstall --- ci/azure/posix.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index deb1ea894f9ae..d4e6dbdb0eb15 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -29,7 +29,7 @@ jobs: # Library not loaded: @rpath/libssl.1.1.dylib - script: | ci/setup_env.sh - conda remove -n pandas-dev -c conda-forge --yes fastparquet + conda remove -n pandas-dev -c conda-forge --force --yes fastparquet displayName: 'Setup environment and build pandas' - script: | From 3e7e3bcc7b077874603868b166339fb48a782f79 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Fri, 4 Feb 2022 14:47:27 -0800 Subject: [PATCH 79/81] Use an xfail --- ci/azure/posix.yml | 6 +----- pandas/tests/io/test_parquet.py | 19 +++++++++++++++---- 2 files changed, 16 insertions(+), 9 deletions(-) diff --git a/ci/azure/posix.yml b/ci/azure/posix.yml index d4e6dbdb0eb15..be76e1366f5ca 100644 --- a/ci/azure/posix.yml +++ b/ci/azure/posix.yml @@ -25,11 +25,7 @@ jobs: - script: echo '##vso[task.prependpath]$(HOME)/miniconda3/bin' displayName: 'Set conda path' - # Remove fastparquet for occasional pyarrow-parquet error - # Library not loaded: @rpath/libssl.1.1.dylib - - script: | - ci/setup_env.sh - conda remove -n pandas-dev -c conda-forge --force --yes fastparquet + - script: ci/setup_env.sh displayName: 'Setup environment and build pandas' - script: | diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b57923093e3e8..215c5c494df27 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -13,7 +13,10 @@ from pandas._config import get_option -from pandas.compat import is_platform_windows +from pandas.compat import ( + is_platform_mac, + is_platform_windows, +) from pandas.compat.pyarrow import ( pa_version_under2p0, pa_version_under5p0, @@ -54,9 +57,17 @@ _HAVE_FASTPARQUET = False -pytestmark = pytest.mark.filterwarnings( - "ignore:RangeIndex.* is deprecated:DeprecationWarning" -) +pytestmark = [ + pytest.mark.filterwarnings("ignore:RangeIndex.* is deprecated:DeprecationWarning"), + pytest.mark.xfail( + is_platform_mac(), + reason="Incorrect build can lead to " + "dlopen(.../pyarrow/_parquet.cpython-39-darwin.so, 2): " + "Library not loaded: @rpath/libssl.1.1.dylib", + raises=ImportError, + strict=False, + ), +] # TODO(ArrayManager) fastparquet relies on BlockManager internals From 311dd9cbff81d6fd99b5bacc688b065b519e7b6a Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sat, 5 Feb 2022 11:25:52 -0800 Subject: [PATCH 80/81] Address some mac test --- pandas/tests/io/test_common.py | 11 +++++++---- pandas/tests/io/test_fsspec.py | 9 +++++++++ pandas/tests/io/test_parquet.py | 12 ++++++++++-- 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/test_common.py b/pandas/tests/io/test_common.py index adf4f32837acf..844eb5521208a 100644 --- a/pandas/tests/io/test_common.py +++ b/pandas/tests/io/test_common.py @@ -236,10 +236,13 @@ def test_write_missing_parent_directory(self, method, module, error_class, fn_ex path = os.path.join(HERE, "data", "missing_folder", "does_not_exist." + fn_ext) - with pytest.raises( - error_class, - match=r"Cannot save file into a non-existent directory: .*missing_folder", - ): + msg = "|".join( + [ + r"\[Errno 2\] No such", + "Cannot save file into a non-existent directory: .*missing_folder", + ] + ) + with pytest.raises(error_class, match=msg): method(dummy_frame, path) @pytest.mark.parametrize( diff --git a/pandas/tests/io/test_fsspec.py b/pandas/tests/io/test_fsspec.py index f1040c0bd30f2..c3f2a0c176b78 100644 --- a/pandas/tests/io/test_fsspec.py +++ b/pandas/tests/io/test_fsspec.py @@ -3,6 +3,7 @@ import numpy as np import pytest +from pandas.compat import is_platform_mac from pandas.compat._optional import VERSIONS from pandas import ( @@ -161,6 +162,14 @@ def test_to_parquet_new_file(monkeypatch, cleared_fs): @td.skip_if_no("pyarrow", min_version="2") +@pytest.mark.xfail( + is_platform_mac(), + reason="Incorrect build can lead to " + "dlopen(.../pyarrow/_parquet.cpython-39-darwin.so, 2): " + "Library not loaded: @rpath/libssl.1.1.dylib", + raises=ImportError, + strict=False, +) def test_arrowparquet_options(fsspectest): """Regression test for writing to a not-yet-existent GCS Parquet file.""" df = DataFrame({"a": [0]}) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index 215c5c494df27..b9acd5ebbd5f6 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -14,6 +14,7 @@ from pandas._config import get_option from pandas.compat import ( + PY39, is_platform_mac, is_platform_windows, ) @@ -182,6 +183,7 @@ def check_round_trip( check_names=True, check_like=False, check_dtype=True, + check_index_type="equiv", repeat=2, ): """Verify parquet serializer and deserializer produce the same results. @@ -228,6 +230,7 @@ def compare(repeat): check_names=check_names, check_like=check_like, check_dtype=check_dtype, + check_index_type=check_index_type, ) if path is None: @@ -847,6 +850,11 @@ def test_s3_roundtrip_for_dir( ) @td.skip_if_no("pyarrow") + @pytest.mark.xfail( + is_platform_mac() and PY39, + raises=TypeError, + reason="expected str, bytes or os.PathLike object, not BytesIO", + ) def test_read_file_like_obj_support(self, df_compat): buffer = BytesIO() df_compat.to_parquet(buffer) @@ -969,7 +977,7 @@ def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): df = pd.DataFrame(index=idx, data={"index_as_col": idx}) # see gh-36004 - # compare time(zone) values only, skip their class: + # compare time(zone) values only, skip their class in the values and index: # pyarrow always creates fixed offset timezones using pytz.FixedOffset() # even if it was datetime.timezone() originally # @@ -977,7 +985,7 @@ def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): # they both implement datetime.tzinfo # they both wrap datetime.timedelta() # this use-case sets the resolution to 1 minute - check_round_trip(df, pa, check_dtype=False) + check_round_trip(df, pa, check_dtype=False, check_index_type=False) @td.skip_if_no("pyarrow", min_version="1.0.0") def test_filter_row_groups(self, pa): From ae8f616fc07f0e309cb57efdcbacc9e1b534b473 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke Date: Sun, 6 Feb 2022 09:19:26 -0800 Subject: [PATCH 81/81] Fix more tests --- pandas/tests/io/test_parquet.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/tests/io/test_parquet.py b/pandas/tests/io/test_parquet.py index b9acd5ebbd5f6..cbf390947d0ec 100644 --- a/pandas/tests/io/test_parquet.py +++ b/pandas/tests/io/test_parquet.py @@ -3,6 +3,7 @@ from io import BytesIO import os import pathlib +import sys from warnings import ( catch_warnings, filterwarnings, @@ -14,7 +15,6 @@ from pandas._config import get_option from pandas.compat import ( - PY39, is_platform_mac, is_platform_windows, ) @@ -851,7 +851,7 @@ def test_s3_roundtrip_for_dir( @td.skip_if_no("pyarrow") @pytest.mark.xfail( - is_platform_mac() and PY39, + is_platform_mac() and sys.version_info[:2] == (3, 9), raises=TypeError, reason="expected str, bytes or os.PathLike object, not BytesIO", ) @@ -966,6 +966,7 @@ def test_timezone_aware_index(self, request, pa, timezone_aware_date_list): if ( not pa_version_under2p0 and timezone_aware_date_list.tzinfo != datetime.timezone.utc + and not is_platform_windows() ): request.node.add_marker( pytest.mark.xfail(