From 0bed8b5a709e70b93558e8c118d46835f126a9bd Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 28 Jun 2023 17:01:11 -0700 Subject: [PATCH 1/7] TST: Misc testing cleanups --- ci/code_checks.sh | 17 ----------------- ci/run_tests.sh | 12 +----------- pandas/tests/groupby/test_timegrouper.py | 1 + pandas/tests/io/parser/test_network.py | 5 ----- pandas/tests/io/test_gcs.py | 8 ++++---- 5 files changed, 6 insertions(+), 37 deletions(-) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 3927b91fe05c0..b1772e2873ee9 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -21,23 +21,6 @@ BASE_DIR="$(dirname $0)/.." RET=0 CHECK=$1 -function invgrep { - # grep with inverse exist status and formatting for azure-pipelines - # - # This function works exactly as grep, but with opposite exit status: - # - 0 (success) when no patterns are found - # - 1 (fail) when the patterns are found - # - # This is useful for the CI, as we want to fail if one of the patterns - # that we want to avoid is found by grep. - grep -n "$@" | sed "s/^/$INVGREP_PREPEND/" | sed "s/$/$INVGREP_APPEND/" ; EXIT_STATUS=${PIPESTATUS[0]} - return $((! $EXIT_STATUS)) -} - -if [[ "$GITHUB_ACTIONS" == "true" ]]; then - INVGREP_PREPEND="##[error]" -fi - ### CODE ### if [[ -z "$CHECK" || "$CHECK" == "code" ]]; then diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 42caebc19e176..54e41ea449848 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -8,19 +8,9 @@ export PYTHONHASHSEED=$(python -c 'import random; print(random.randint(1, 429496 # May help reproduce flaky CI builds if set in subsequent runs echo PYTHONHASHSEED=$PYTHONHASHSEED -if [[ "not network" == *"$PATTERN"* ]]; then - export http_proxy=http://1.2.3.4 https_proxy=http://1.2.3.4; -fi - COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" -# If no X server is found, we use xvfb to emulate it -if [[ $(uname) == "Linux" && -z $DISPLAY ]]; then - export DISPLAY=":0" - XVFB="xvfb-run " -fi - -PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 ${XVFB}pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" diff --git a/pandas/tests/groupby/test_timegrouper.py b/pandas/tests/groupby/test_timegrouper.py index cfbecd3efd07e..04b99939514e6 100644 --- a/pandas/tests/groupby/test_timegrouper.py +++ b/pandas/tests/groupby/test_timegrouper.py @@ -907,6 +907,7 @@ def test_groupby_apply_timegrouper_with_nat_apply_squeeze( tm.assert_frame_equal(res, expected) @td.skip_if_no("numba") + @pytest.mark.single_cpu def test_groupby_agg_numba_timegrouper_with_nat( self, groupby_with_truncated_bingrouper ): diff --git a/pandas/tests/io/parser/test_network.py b/pandas/tests/io/parser/test_network.py index ba0307cf5111e..ea89d9cf42824 100644 --- a/pandas/tests/io/parser/test_network.py +++ b/pandas/tests/io/parser/test_network.py @@ -74,11 +74,6 @@ def tips_df(datapath): @pytest.mark.single_cpu @pytest.mark.usefixtures("s3_resource") -@pytest.mark.xfail( - reason="CI race condition GH 45433, GH 44584", - raises=FileNotFoundError, - strict=False, -) @td.skip_if_not_us_locale() class TestS3: @td.skip_if_no("s3fs") diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index bdea24f7bb5aa..9a2380166dd0f 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -22,7 +22,7 @@ @pytest.fixture def gcs_buffer(): """Emulate GCS using a binary buffer.""" - import fsspec + fsspec = pytest.importorskip("fsspec") gcs_buffer = BytesIO() gcs_buffer.close = lambda: True @@ -43,7 +43,6 @@ def ls(self, path, **kwargs): return gcs_buffer -@td.skip_if_no("gcsfs") # Patches pyarrow; other processes should not pick up change @pytest.mark.single_cpu @pytest.mark.parametrize("format", ["csv", "json", "parquet", "excel", "markdown"]) @@ -53,6 +52,7 @@ def test_to_read_gcs(gcs_buffer, format, monkeypatch, capsys): GH 33987 """ + pytest.importorskip("gcsfs") df1 = DataFrame( { @@ -131,7 +131,6 @@ def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str): assert result == expected -@td.skip_if_no("gcsfs") @pytest.mark.parametrize("encoding", ["utf-8", "cp1251"]) def test_to_csv_compression_encoding_gcs( gcs_buffer, compression_only, encoding, compression_to_extension @@ -142,6 +141,7 @@ def test_to_csv_compression_encoding_gcs( GH 35677 (to_csv, compression), GH 26124 (to_csv, encoding), and GH 32392 (read_csv, encoding) """ + pytest.importorskip("gcsfs") df = tm.makeDataFrame() # reference of compressed and encoded file @@ -178,9 +178,9 @@ def test_to_csv_compression_encoding_gcs( @td.skip_if_no("fastparquet") -@td.skip_if_no("gcsfs") def test_to_parquet_gcs_new_file(monkeypatch, tmpdir): """Regression test for writing to a not-yet-existent GCS Parquet file.""" + pytest.importorskip("gcsfs") from fsspec import AbstractFileSystem df1 = DataFrame( From f2e862eab37b3e43f501adcc2dcbe25ed5017b3c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 28 Jun 2023 17:32:56 -0700 Subject: [PATCH 2/7] more cleans --- .github/workflows/unit-tests.yml | 4 ++-- pandas/_testing/_io.py | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index bd104af4a8d9e..f61f3be6be88b 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -81,7 +81,7 @@ jobs: ENV_FILE: ci/deps/${{ matrix.env_file }} PATTERN: ${{ matrix.pattern }} EXTRA_APT: ${{ matrix.extra_apt || '' }} - LANG: ${{ matrix.lang || '' }} + LANG: ${{ matrix.lang || 'C.UTF-8' }} LC_ALL: ${{ matrix.lc_all || '' }} PANDAS_COPY_ON_WRITE: ${{ matrix.pandas_copy_on_write || '0' }} PANDAS_CI: ${{ matrix.pandas_ci || '1' }} @@ -122,7 +122,7 @@ jobs: - 5432:5432 moto: - image: motoserver/moto:4.1.4 + image: motoserver/moto:4.1.12 env: AWS_ACCESS_KEY_ID: foobar_key AWS_SECRET_ACCESS_KEY: foobar_secret diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index fa0bc58a132d4..8384974a6bc6f 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -3,6 +3,7 @@ import bz2 import gzip import io +import pathlib import tarfile from typing import ( TYPE_CHECKING, @@ -77,9 +78,7 @@ def round_trip_pathlib(writer, reader, path: str | None = None): pandas object The original object that was serialized and then re-read. """ - import pytest - - Path = pytest.importorskip("pathlib").Path + Path = pathlib.Path if path is None: path = "___pathlib___" with ensure_clean(path) as path: From ea9f3c2f0864aaa81492112efa3d1c2a2dd0dc24 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 28 Jun 2023 17:40:36 -0700 Subject: [PATCH 3/7] pytest updates --- ci/run_tests.sh | 2 +- pyproject.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 54e41ea449848..4b66746f73bb4 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -10,7 +10,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" -PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile --tmp_path_retention_policy "none" $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" diff --git a/pyproject.toml b/pyproject.toml index ef257b3143598..1ca0aaa33e179 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -453,7 +453,7 @@ disable = [ [tool.pytest.ini_options] # sync minversion with pyproject.toml & install.rst -minversion = "7.0" +minversion = "7.3.2" addopts = "--strict-data-files --strict-markers --strict-config --capture=no --durations=30 --junitxml=test-data.xml" empty_parameter_set_mark = "fail_at_collect" xfail_strict = true From de45eb87c87593f7ff5cac10ca2cc0594efecc00 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 29 Jun 2023 11:27:45 -0700 Subject: [PATCH 4/7] trigger ci From 9564d9aa18770038c0ef5aa873067957975c5182 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 29 Jun 2023 11:41:15 -0700 Subject: [PATCH 5/7] Remove tmp_file removal --- ci/run_tests.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/run_tests.sh b/ci/run_tests.sh index 4b66746f73bb4..54e41ea449848 100755 --- a/ci/run_tests.sh +++ b/ci/run_tests.sh @@ -10,7 +10,7 @@ echo PYTHONHASHSEED=$PYTHONHASHSEED COVERAGE="-s --cov=pandas --cov-report=xml --cov-append --cov-config=pyproject.toml" -PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile --tmp_path_retention_policy "none" $TEST_ARGS $COVERAGE $PYTEST_TARGET" +PYTEST_CMD="MESONPY_EDITABLE_VERBOSE=1 pytest -r fEs -n $PYTEST_WORKERS --dist=loadfile $TEST_ARGS $COVERAGE $PYTEST_TARGET" if [[ "$PATTERN" ]]; then PYTEST_CMD="$PYTEST_CMD -m \"$PATTERN\"" From e5c659826a557ca3123ebb673c7c14217b50fc03 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 29 Jun 2023 13:34:11 -0700 Subject: [PATCH 6/7] Revert gcs testing --- pandas/tests/io/test_gcs.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/test_gcs.py b/pandas/tests/io/test_gcs.py index 9a2380166dd0f..bdea24f7bb5aa 100644 --- a/pandas/tests/io/test_gcs.py +++ b/pandas/tests/io/test_gcs.py @@ -22,7 +22,7 @@ @pytest.fixture def gcs_buffer(): """Emulate GCS using a binary buffer.""" - fsspec = pytest.importorskip("fsspec") + import fsspec gcs_buffer = BytesIO() gcs_buffer.close = lambda: True @@ -43,6 +43,7 @@ def ls(self, path, **kwargs): return gcs_buffer +@td.skip_if_no("gcsfs") # Patches pyarrow; other processes should not pick up change @pytest.mark.single_cpu @pytest.mark.parametrize("format", ["csv", "json", "parquet", "excel", "markdown"]) @@ -52,7 +53,6 @@ def test_to_read_gcs(gcs_buffer, format, monkeypatch, capsys): GH 33987 """ - pytest.importorskip("gcsfs") df1 = DataFrame( { @@ -131,6 +131,7 @@ def assert_equal_zip_safe(result: bytes, expected: bytes, compression: str): assert result == expected +@td.skip_if_no("gcsfs") @pytest.mark.parametrize("encoding", ["utf-8", "cp1251"]) def test_to_csv_compression_encoding_gcs( gcs_buffer, compression_only, encoding, compression_to_extension @@ -141,7 +142,6 @@ def test_to_csv_compression_encoding_gcs( GH 35677 (to_csv, compression), GH 26124 (to_csv, encoding), and GH 32392 (read_csv, encoding) """ - pytest.importorskip("gcsfs") df = tm.makeDataFrame() # reference of compressed and encoded file @@ -178,9 +178,9 @@ def test_to_csv_compression_encoding_gcs( @td.skip_if_no("fastparquet") +@td.skip_if_no("gcsfs") def test_to_parquet_gcs_new_file(monkeypatch, tmpdir): """Regression test for writing to a not-yet-existent GCS Parquet file.""" - pytest.importorskip("gcsfs") from fsspec import AbstractFileSystem df1 = DataFrame( From c07178e6e129bce899d851e683884fd8c3aa4bf1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Thu, 29 Jun 2023 15:14:20 -0700 Subject: [PATCH 7/7] type ignore --- pandas/_testing/_io.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/_testing/_io.py b/pandas/_testing/_io.py index 8384974a6bc6f..b57f9f121e96f 100644 --- a/pandas/_testing/_io.py +++ b/pandas/_testing/_io.py @@ -82,8 +82,8 @@ def round_trip_pathlib(writer, reader, path: str | None = None): if path is None: path = "___pathlib___" with ensure_clean(path) as path: - writer(Path(path)) - obj = reader(Path(path)) + writer(Path(path)) # type: ignore[arg-type] + obj = reader(Path(path)) # type: ignore[arg-type] return obj