From 11186b0732abb0554d90da5162179c1b11eccdbe Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 7 Aug 2024 10:21:53 +0200 Subject: [PATCH 1/8] TST (string dtype): add test build with future strings enabled without pyarrow --- .github/actions/setup-conda/action.yml | 6 ++++++ .github/workflows/unit-tests.yml | 4 ++++ 2 files changed, 10 insertions(+) diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml index ceeebfcd1c90c..d99d63c3debd0 100644 --- a/.github/actions/setup-conda/action.yml +++ b/.github/actions/setup-conda/action.yml @@ -14,3 +14,9 @@ runs: condarc-file: ci/.condarc cache-environment: true cache-downloads: true + + - name: Uninstall pyarrow + if: ${{ env.REMOVE_PYARROW == '1' }} + run: | + micromamba uninstall -y pyarrow + shell: bash -el {0} diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index a085d0265a1a5..a6a21388cb175 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -60,6 +60,9 @@ jobs: - name: "Future infer strings" env_file: actions-311.yaml pandas_future_infer_string: "1" + - name: "Future infer strings (without pyarrow)" + env_file: actions-311.yaml + pandas_future_infer_string: "1" - name: "Pypy" env_file: actions-pypy-39.yaml pattern: "not slow and not network and not single_cpu" @@ -85,6 +88,7 @@ jobs: NPY_PROMOTION_STATE: ${{ matrix.env_file == 'actions-311-numpydev.yaml' && 'weak' || 'legacy' }} # Clipboard tests QT_QPA_PLATFORM: offscreen + REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }} concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}} From 8df67959b2f1916fccd1d4a47fd0e7e73cf087c0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 7 Aug 2024 11:02:00 +0200 Subject: [PATCH 2/8] ensure the build doesn't override the default ones --- .github/workflows/unit-tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index a6a21388cb175..6b0ba1536fcb8 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -29,6 +29,7 @@ jobs: env_file: [actions-310.yaml, actions-311.yaml, actions-312.yaml] # Prevent the include jobs from overriding other jobs pattern: [""] + pandas_future_infer_string: ["0"] include: - name: "Downstream Compat" env_file: actions-311-downstream_compat.yaml From 4b932ae738587eef8fcf1ed3fe512409c1b224dc Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 7 Aug 2024 11:04:14 +0200 Subject: [PATCH 3/8] uninstall -> remove --- .github/actions/setup-conda/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/setup-conda/action.yml b/.github/actions/setup-conda/action.yml index d99d63c3debd0..3eb68bdd2a15c 100644 --- a/.github/actions/setup-conda/action.yml +++ b/.github/actions/setup-conda/action.yml @@ -18,5 +18,5 @@ runs: - name: Uninstall pyarrow if: ${{ env.REMOVE_PYARROW == '1' }} run: | - micromamba uninstall -y pyarrow + micromamba remove -y pyarrow shell: bash -el {0} From db237bb296e10cd49b1173aaa044e7370a2f84f4 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 7 Aug 2024 14:13:08 +0200 Subject: [PATCH 4/8] avoid jobs with same env being cancelled --- .github/workflows/unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 6b0ba1536fcb8..200e5d0f8f8a6 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -92,7 +92,7 @@ jobs: REMOVE_PYARROW: ${{ matrix.name == 'Future infer strings (without pyarrow)' && '1' || '0' }} concurrency: # https://github.community/t/concurrecy-not-work-for-push/183068/7 - group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}} + group: ${{ github.event_name == 'push' && github.run_number || github.ref }}-${{ matrix.env_file }}-${{ matrix.pattern }}-${{ matrix.extra_apt || '' }}-${{ matrix.pandas_future_infer_string }} cancel-in-progress: true services: From d414dc9a8a70299fb1eaf6eece8b962a0dcc9331 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 7 Aug 2024 14:14:20 +0200 Subject: [PATCH 5/8] use different python version for both future jobs --- .github/workflows/unit-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/unit-tests.yml b/.github/workflows/unit-tests.yml index 200e5d0f8f8a6..166c06acccc49 100644 --- a/.github/workflows/unit-tests.yml +++ b/.github/workflows/unit-tests.yml @@ -59,7 +59,7 @@ jobs: # It will be temporarily activated during tests with locale.setlocale extra_loc: "zh_CN" - name: "Future infer strings" - env_file: actions-311.yaml + env_file: actions-312.yaml pandas_future_infer_string: "1" - name: "Future infer strings (without pyarrow)" env_file: actions-311.yaml From 0b2bd61b2181794d3ee6ed0f713e9a44ada02a32 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Wed, 7 Aug 2024 19:59:35 +0200 Subject: [PATCH 6/8] add some xfails --- pandas/tests/apply/test_invalid_arg.py | 6 ++++++ pandas/tests/apply/test_numba.py | 1 + pandas/tests/arrays/boolean/test_arithmetic.py | 7 +++++++ pandas/tests/extension/base/ops.py | 5 +++++ pandas/tests/extension/test_categorical.py | 2 ++ pandas/tests/extension/test_numpy.py | 7 +++++++ pandas/tests/frame/indexing/test_where.py | 5 +++++ pandas/tests/frame/methods/test_info.py | 3 ++- pandas/tests/frame/methods/test_rank.py | 14 +++++++++++++- pandas/tests/frame/methods/test_value_counts.py | 7 +++++++ pandas/tests/frame/test_api.py | 6 +++++- pandas/tests/frame/test_arithmetic.py | 6 +++++- pandas/tests/frame/test_constructors.py | 3 ++- pandas/tests/frame/test_logical_ops.py | 7 +++++++ pandas/tests/frame/test_reductions.py | 11 ++++++++--- pandas/tests/frame/test_subclass.py | 4 ++++ pandas/tests/frame/test_unary.py | 3 ++- .../tests/groupby/methods/test_value_counts.py | 1 + pandas/tests/groupby/test_groupby.py | 2 ++ pandas/tests/groupby/test_groupby_dropna.py | 4 ++++ pandas/tests/indexes/multi/test_join.py | 3 +++ pandas/tests/indexes/object/test_indexing.py | 3 +++ pandas/tests/indexes/test_base.py | 5 +++++ pandas/tests/indexes/test_old_base.py | 1 + pandas/tests/indexing/test_loc.py | 4 ++++ pandas/tests/io/formats/style/test_bar.py | 1 + pandas/tests/io/parser/conftest.py | 11 ++++++++++- pandas/tests/series/test_api.py | 7 +++++++ pandas/tests/series/test_arithmetic.py | 4 ++++ pandas/tests/series/test_logical_ops.py | 8 ++++++++ pandas/tests/series/test_reductions.py | 13 +++++++++++++ pandas/tests/window/test_groupby.py | 16 ++++++++++++++++ pandas/tests/window/test_rolling.py | 6 ++++++ 33 files changed, 176 insertions(+), 10 deletions(-) diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index 3137d3ff50954..dcb313035665e 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -12,6 +12,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.errors import SpecificationError from pandas import ( @@ -209,6 +211,8 @@ def transform(row): data.apply(transform, axis=1) +# we should raise a proper TypeError instead of propagating the pyarrow error +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "df, func, expected", tm.get_cython_table_params( @@ -229,6 +233,8 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_str df.agg(func, axis=axis) +# we should raise a proper TypeError instead of propagating the pyarrow error +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "series, func, expected", chain( diff --git a/pandas/tests/apply/test_numba.py b/pandas/tests/apply/test_numba.py index 6ac0b49f0e4e7..6bbe5100e8826 100644 --- a/pandas/tests/apply/test_numba.py +++ b/pandas/tests/apply/test_numba.py @@ -104,6 +104,7 @@ def test_numba_nonunique_unsupported(apply_axis): def test_numba_unsupported_dtypes(apply_axis): + pytest.importorskip("pyarrow") f = lambda x: x df = DataFrame({"a": [1, 2], "b": ["a", "b"], "c": [4, 5]}) df["c"] = df["c"].astype("double[pyarrow]") diff --git a/pandas/tests/arrays/boolean/test_arithmetic.py b/pandas/tests/arrays/boolean/test_arithmetic.py index 0c4fcf149eb20..4dbd8eb9f5ca7 100644 --- a/pandas/tests/arrays/boolean/test_arithmetic.py +++ b/pandas/tests/arrays/boolean/test_arithmetic.py @@ -3,6 +3,10 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + +from pandas.compat import HAS_PYARROW + import pandas as pd import pandas._testing as tm @@ -90,6 +94,9 @@ def test_op_int8(left_array, right_array, opname): # ----------------------------------------------------------------------------- +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_error_invalid_values(data, all_arithmetic_operators, using_infer_string): # invalid ops diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index fad2560265d21..8df566890bcf1 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -140,6 +140,7 @@ class BaseArithmeticOpsTests(BaseOpsUtil): series_array_exc: type[Exception] | None = TypeError divmod_exc: type[Exception] | None = TypeError + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_series_with_scalar(self, data, all_arithmetic_operators): # series & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): @@ -149,6 +150,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): ser = pd.Series(data) self.check_opname(ser, op_name, ser.iloc[0]) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): @@ -158,12 +160,14 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): df = pd.DataFrame({"A": data}) self.check_opname(df, op_name, data[0]) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op_name = all_arithmetic_operators ser = pd.Series(data) self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_divmod(self, data): ser = pd.Series(data) self._check_divmod_op(ser, divmod, 1) @@ -179,6 +183,7 @@ def test_divmod_series_array(self, data, data_for_twos): other = pd.Series(other) self._check_divmod_op(other, ops.rdivmod, ser) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_add_series_with_extension_array(self, data): # Check adding an ExtensionArray to a Series of the same dtype matches # the behavior of adding the arrays directly and then wrapping in a diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index 8f8af607585df..c3d4b83f731a3 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -140,6 +140,7 @@ def test_map(self, data, na_action): result = data.map(lambda x: x, na_action=na_action) tm.assert_extension_array_equal(result, data) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): # frame & scalar op_name = all_arithmetic_operators @@ -151,6 +152,7 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): ) super().test_arith_frame_with_scalar(data, op_name) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): op_name = all_arithmetic_operators if op_name == "__rmod__": diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 79cfb736941d6..1b251a5118681 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -19,6 +19,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.core.dtypes.dtypes import NumpyEADtype import pandas as pd @@ -255,6 +257,7 @@ def test_insert_invalid(self, data, invalid_scalar): frame_scalar_exc = None series_array_exc = None + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_divmod(self, data): divmod_exc = None if data.dtype.kind == "O": @@ -262,6 +265,7 @@ def test_divmod(self, data): self.divmod_exc = divmod_exc super().test_divmod(data) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_divmod_series_array(self, data): ser = pd.Series(data) exc = None @@ -270,6 +274,7 @@ def test_divmod_series_array(self, data): self.divmod_exc = exc self._check_divmod_op(ser, divmod, data) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request): opname = all_arithmetic_operators series_scalar_exc = None @@ -283,6 +288,7 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators, request) self.series_scalar_exc = series_scalar_exc super().test_arith_series_with_scalar(data, all_arithmetic_operators) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_series_with_array(self, data, all_arithmetic_operators): opname = all_arithmetic_operators series_array_exc = None @@ -291,6 +297,7 @@ def test_arith_series_with_array(self, data, all_arithmetic_operators): self.series_array_exc = series_array_exc super().test_arith_series_with_array(data, all_arithmetic_operators) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators, request): opname = all_arithmetic_operators frame_scalar_exc = None diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 1d7b3e12b2e86..6f387abf523b9 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -6,6 +6,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + from pandas.core.dtypes.common import is_scalar import pandas as pd @@ -1018,6 +1020,9 @@ def test_where_producing_ea_cond_for_np_dtype(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False +) @pytest.mark.parametrize( "replacement", [0.001, True, "snake", None, datetime(2022, 5, 4)] ) diff --git a/pandas/tests/frame/methods/test_info.py b/pandas/tests/frame/methods/test_info.py index a4319f8a8ae7f..aad43b7a77ac7 100644 --- a/pandas/tests/frame/methods/test_info.py +++ b/pandas/tests/frame/methods/test_info.py @@ -10,6 +10,7 @@ from pandas._config import using_string_dtype from pandas.compat import ( + HAS_PYARROW, IS64, PYPY, ) @@ -520,7 +521,7 @@ def test_info_int_columns(): assert result == expected -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_memory_usage_empty_no_warning(): # GH#50066 df = DataFrame(index=["a", "b"]) diff --git a/pandas/tests/frame/methods/test_rank.py b/pandas/tests/frame/methods/test_rank.py index 79aabbcc83bbf..4e8e267523439 100644 --- a/pandas/tests/frame/methods/test_rank.py +++ b/pandas/tests/frame/methods/test_rank.py @@ -6,10 +6,13 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.algos import ( Infinity, NegInfinity, ) +from pandas.compat import HAS_PYARROW from pandas import ( DataFrame, @@ -464,9 +467,18 @@ def test_rank_inf_nans_na_option( ], ) def test_rank_object_first( - self, frame_or_series, na_option, ascending, expected, using_infer_string + self, + request, + frame_or_series, + na_option, + ascending, + expected, + using_infer_string, ): obj = frame_or_series(["foo", "foo", None, "foo"]) + if using_string_dtype() and not HAS_PYARROW and isinstance(obj, Series): + request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)")) + result = obj.rank(method="first", na_option=na_option, ascending=ascending) expected = frame_or_series(expected) if using_infer_string and isinstance(obj, Series): diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py index 4136d641ef67f..ab79915704f92 100644 --- a/pandas/tests/frame/methods/test_value_counts.py +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -1,6 +1,10 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + +from pandas.compat import HAS_PYARROW + import pandas as pd import pandas._testing as tm @@ -132,6 +136,9 @@ def test_data_frame_value_counts_dropna_true(nulls_fixture): tm.assert_series_equal(result, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_data_frame_value_counts_dropna_false(nulls_fixture): # GH 41334 df = pd.DataFrame( diff --git a/pandas/tests/frame/test_api.py b/pandas/tests/frame/test_api.py index e8ef0592ac432..f8219e68a72da 100644 --- a/pandas/tests/frame/test_api.py +++ b/pandas/tests/frame/test_api.py @@ -8,6 +8,8 @@ from pandas._config import using_string_dtype from pandas._config.config import option_context +from pandas.compat import HAS_PYARROW + import pandas as pd from pandas import ( DataFrame, @@ -113,7 +115,9 @@ def test_not_hashable(self): with pytest.raises(TypeError, match=msg): hash(empty_frame) - @pytest.mark.xfail(using_string_dtype(), reason="surrogates not allowed") + @pytest.mark.xfail( + using_string_dtype() and HAS_PYARROW, reason="surrogates not allowed" + ) def test_column_name_contains_unicode_surrogate(self): # GH 25509 colname = "\ud83d" diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index 734bfc8b30053..e41a3b27e592c 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -13,6 +13,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + import pandas as pd from pandas import ( DataFrame, @@ -1542,7 +1544,9 @@ def test_comparisons(self, simple_frame, float_frame, func): with pytest.raises(ValueError, match=msg): func(simple_frame, simple_frame[:2]) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") + @pytest.mark.xfail( + using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)" + ) def test_strings_to_numbers_comparisons_raises(self, compare_operators_no_eq_ne): # GH 11565 df = DataFrame( diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index a210af94561f9..0176a36fe78d7 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -24,6 +24,7 @@ from pandas._config import using_string_dtype from pandas._libs import lib +from pandas.compat import HAS_PYARROW from pandas.compat.numpy import np_version_gt2 from pandas.errors import IntCastingNaNError @@ -299,7 +300,7 @@ def test_constructor_dtype_nocast_view_2d_array(self): df2 = DataFrame(df.values, dtype=df[0].dtype) assert df2._mgr.blocks[0].values.flags.c_contiguous - @pytest.mark.xfail(using_string_dtype(), reason="conversion copies") + @pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="conversion copies") def test_1d_object_array_does_not_copy(self): # https://github.com/pandas-dev/pandas/issues/39272 arr = np.array(["a", "b"], dtype="object") diff --git a/pandas/tests/frame/test_logical_ops.py b/pandas/tests/frame/test_logical_ops.py index ad54cfaf9d927..6788721e8a72e 100644 --- a/pandas/tests/frame/test_logical_ops.py +++ b/pandas/tests/frame/test_logical_ops.py @@ -4,6 +4,10 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + +from pandas.compat import HAS_PYARROW + from pandas import ( CategoricalIndex, DataFrame, @@ -96,6 +100,9 @@ def test_logical_ops_int_frame(self): res_ser = df1a_int["A"] | df1a_bool["A"] tm.assert_series_equal(res_ser, df1a_bool["A"]) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_logical_ops_invalid(self, using_infer_string): # GH#5808 diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 4c355ed92b6c3..1d667d35db253 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -226,6 +226,7 @@ def float_frame_with_na(): class TestDataFrameAnalytics: # --------------------------------------------------------------------- # Reductions + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("axis", [0, 1]) @pytest.mark.parametrize( "opname", @@ -431,6 +432,7 @@ def test_stat_operators_attempt_obj_array(self, method, df, axis): expected[expected.isna()] = None tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("op", ["mean", "std", "var", "skew", "kurt", "sem"]) def test_mixed_ops(self, op): # GH#16116 @@ -532,7 +534,7 @@ def test_mean_mixed_string_decimal(self): df = DataFrame(d) with pytest.raises( - TypeError, match="unsupported operand type|does not support" + TypeError, match="unsupported operand type|does not support|Cannot perform" ): df.mean() result = df[["A", "C"]].mean() @@ -690,6 +692,7 @@ def test_mode_dropna(self, dropna, expected): expected = DataFrame(expected) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_mode_sortwarning(self, using_infer_string): # Check for the warning that is raised when the mode # results cannot be sorted @@ -979,7 +982,7 @@ def test_sum_mixed_datetime(self): def test_mean_corner(self, float_frame, float_string_frame): # unit test when have object data - msg = "Could not convert|does not support" + msg = "Could not convert|does not support|Cannot perform" with pytest.raises(TypeError, match=msg): float_string_frame.mean(axis=0) @@ -1093,6 +1096,7 @@ def test_idxmin_empty(self, index, skipna, axis): expected = Series(dtype=index.dtype) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("numeric_only", [True, False]) def test_idxmin_numeric_only(self, numeric_only): df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")}) @@ -1143,6 +1147,7 @@ def test_idxmax_empty(self, index, skipna, axis): expected = Series(dtype=index.dtype) tm.assert_series_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("numeric_only", [True, False]) def test_idxmax_numeric_only(self, numeric_only): df = DataFrame({"a": [2, 3, 1], "b": [2, 1, 1], "c": list("xyx")}) @@ -1964,7 +1969,7 @@ def test_minmax_extensionarray(method, numeric_only): def test_frame_mixed_numeric_object_with_timestamp(ts_value): # GH 13912 df = DataFrame({"a": [1], "b": [1.1], "c": ["foo"], "d": [ts_value]}) - with pytest.raises(TypeError, match="does not support operation"): + with pytest.raises(TypeError, match="does not support operation|Cannot perform"): df.sum() diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index 7d18ef28a722d..a70d67bb2ada9 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + import pandas as pd from pandas import ( DataFrame, @@ -147,6 +149,7 @@ def nonexistence(self): with pytest.raises(AttributeError, match=".*i_dont_exist.*"): A().nonexistence + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_subclass_align(self): # GH 12983 df1 = tm.SubclassedDataFrame( @@ -176,6 +179,7 @@ def test_subclass_align(self): assert isinstance(res2, tm.SubclassedSeries) tm.assert_series_equal(res2, exp2.c) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_subclass_align_combinations(self): # GH 12983 df = tm.SubclassedDataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py index 1887fa61ad081..bc8016a6a4e23 100644 --- a/pandas/tests/frame/test_unary.py +++ b/pandas/tests/frame/test_unary.py @@ -42,6 +42,7 @@ def test_neg_object(self, df, expected): tm.assert_frame_equal(-df, expected) tm.assert_series_equal(-df["a"], expected["a"]) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "df_data", [ @@ -130,7 +131,7 @@ def test_pos_object(self, df_data): tm.assert_frame_equal(+df, df) tm.assert_series_equal(+df["a"], df["a"]) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.filterwarnings("ignore:Applying:DeprecationWarning") def test_pos_object_raises(self): # GH#21380 diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 14d3dbd6fa496..6c5a36b11be25 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -500,6 +500,7 @@ def test_dropna_combinations( tm.assert_series_equal(result, expected) +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "dropna, expected_data, expected_index", [ diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 791f279bffc94..1f2b8ccd4d660 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1407,6 +1407,8 @@ def g(group): tm.assert_series_equal(result, expected) +# TODO harmonize error messages +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("grouper", ["A", ["A", "B"]]) def test_set_group_name(df, grouper, using_infer_string): def f(group): diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index d42aa06d6bbfe..ce0af60c57c44 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -11,6 +11,10 @@ import pandas._testing as tm from pandas.tests.groupby import get_groupby_method_args +pytestmark = pytest.mark.xfail( + using_string_dtype(), reason="TODO(infer_string)", strict=False +) + @pytest.mark.parametrize( "dropna, tuples, outputs", diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index 2be6bba475af7..ec6f5aaf4b6f1 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas import ( DataFrame, Index, @@ -12,6 +14,7 @@ import pandas._testing as tm +@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("other", [["three", "one", "two"], ["one"], ["one", "three"]]) def test_join_level(idx, other, join_type): other = Index(other) diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py index 1eeeebd6b8ca9..e5756ec6ccd9f 100644 --- a/pandas/tests/indexes/object/test_indexing.py +++ b/pandas/tests/indexes/object/test_indexing.py @@ -3,6 +3,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas._libs.missing import ( NA, is_matching_na, @@ -29,6 +31,7 @@ def test_get_indexer_strings(self, method, expected): tm.assert_numpy_array_equal(actual, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_get_indexer_strings_raises(self, using_infer_string): index = Index(["b", "c"]) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 0911f2aec74d6..cf5bcf31250a0 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -8,6 +8,8 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import IS64 from pandas.errors import InvalidIndexError import pandas.util._test_decorators as td @@ -71,6 +73,7 @@ def test_constructor_casting(self, index): tm.assert_contains_all(arr, new_index) tm.assert_index_equal(index, new_index) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_constructor_copy(self, using_infer_string): index = Index(list("abc"), name="name") arr = np.array(index) @@ -335,6 +338,7 @@ def test_constructor_empty_special(self, empty, klass): def test_view_with_args(self, index): index.view("i8") + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize( "index", [ @@ -817,6 +821,7 @@ def test_isin(self, values, index, expected): expected = np.array(expected, dtype=bool) tm.assert_numpy_array_equal(result, expected) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_isin_nan_common_object( self, nulls_fixture, nulls_fixture2, using_infer_string ): diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index 6d01ba6adc87a..a41d50cfaa48d 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -245,6 +245,7 @@ def test_repr_max_seq_item_setting(self, simple_index): repr(idx) assert "..." not in str(idx) + @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_ensure_copied_data(self, index): # Check the "copy" argument of each Index.__new__ is honoured diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 247501f1504e7..e007b8c4e97ac 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -16,6 +16,7 @@ from pandas._config import using_string_dtype from pandas._libs import index as libindex +from pandas.compat import HAS_PYARROW from pandas.errors import IndexingError import pandas as pd @@ -1388,6 +1389,9 @@ def test_loc_setitem_categorical_values_partial_column_slice(self): df.loc[1:2, "a"] = Categorical(["b", "b"], categories=["a", "b"]) df.loc[2:3, "b"] = Categorical(["b", "b"], categories=["a", "b"]) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_loc_setitem_single_row_categorical(self, using_infer_string): # GH#25495 df = DataFrame({"Alpha": ["a"], "Numeric": [0]}) diff --git a/pandas/tests/io/formats/style/test_bar.py b/pandas/tests/io/formats/style/test_bar.py index b0e4712e8bb3d..d28c7c566d851 100644 --- a/pandas/tests/io/formats/style/test_bar.py +++ b/pandas/tests/io/formats/style/test_bar.py @@ -347,6 +347,7 @@ def test_styler_bar_with_NA_values(): def test_style_bar_with_pyarrow_NA_values(): + pytest.importorskip("pyarrow") data = """name,age,test1,test2,teacher Adam,15,95.0,80,Ashby Bob,16,81.0,82,Ashby diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 6d5f870f07206..02332a70582af 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -4,6 +4,7 @@ import pytest +from pandas.compat import HAS_PYARROW from pandas.compat._optional import VERSIONS from pandas import ( @@ -117,7 +118,15 @@ def csv1(datapath): _py_parsers_only = [_pythonParser] _c_parsers_only = [_cParserHighMemory, _cParserLowMemory] -_pyarrow_parsers_only = [pytest.param(_pyarrowParser, marks=pytest.mark.single_cpu)] +_pyarrow_parsers_only = [ + pytest.param( + _pyarrowParser, + marks=[ + pytest.mark.single_cpu, + pytest.mark.skipif(not HAS_PYARROW, reason="pyarrow is not installed"), + ], + ) +] _all_parsers = [*_c_parsers_only, *_py_parsers_only, *_pyarrow_parsers_only] diff --git a/pandas/tests/series/test_api.py b/pandas/tests/series/test_api.py index a63ffbbd3a5a1..79a55eb357f87 100644 --- a/pandas/tests/series/test_api.py +++ b/pandas/tests/series/test_api.py @@ -4,6 +4,10 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + +from pandas.compat import HAS_PYARROW + import pandas as pd from pandas import ( DataFrame, @@ -160,6 +164,9 @@ def test_attrs(self): result = s + 1 assert result.attrs == {"version": 1} + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_inspect_getmembers(self): # GH38782 pytest.importorskip("jinja2") diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index ff84b5c52183b..ed56a5c1ee855 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -13,6 +13,7 @@ from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency +from pandas.compat import HAS_PYARROW import pandas as pd from pandas import ( @@ -193,6 +194,9 @@ def test_string_addition(self, target_add, input_value, expected_value): expected = Series(expected_value) tm.assert_series_equal(result, expected) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_divmod(self): # GH#25557 a = Series([1, 1, 1, np.nan], index=["a", "b", "c", "d"]) diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index 939bf888fd61b..94934e6c770a1 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -6,6 +6,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + from pandas import ( DataFrame, Index, @@ -143,6 +145,9 @@ def test_logical_operators_int_dtype_with_bool(self): expected = Series([False, True, True, True]) tm.assert_series_equal(result, expected) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_logical_operators_int_dtype_with_object(self, using_infer_string): # GH#9016: support bitwise op for integer types s_0123 = Series(range(4), dtype="int64") @@ -462,6 +467,9 @@ def test_logical_ops_label_based(self, using_infer_string): with pytest.raises(TypeError, match=msg): t & v + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_logical_ops_df_compat(self): # GH#1134 s1 = Series([True, False, True], index=list("ABC"), name="x") diff --git a/pandas/tests/series/test_reductions.py b/pandas/tests/series/test_reductions.py index 0bc3092d30b43..7bbb902e14a36 100644 --- a/pandas/tests/series/test_reductions.py +++ b/pandas/tests/series/test_reductions.py @@ -1,6 +1,10 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + +from pandas.compat import HAS_PYARROW + import pandas as pd from pandas import Series import pandas._testing as tm @@ -162,6 +166,9 @@ def test_validate_stat_keepdims(): np.sum(ser, keepdims=True) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_mean_with_convertible_string_raises(using_infer_string): # GH#44008 ser = Series(["1", "2"]) @@ -181,6 +188,9 @@ def test_mean_with_convertible_string_raises(using_infer_string): df.mean() +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_mean_dont_convert_j_to_complex(): # GH#36703 df = pd.DataFrame([{"db": "J", "numeric": 123}]) @@ -199,6 +209,9 @@ def test_mean_dont_convert_j_to_complex(): np.mean(df["db"].astype("string").array) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_median_with_convertible_string_raises(): # GH#34671 this _could_ return a string "2", but definitely not float 2.0 msg = r"Cannot convert \['1' '2' '3'\] to numeric|does not support" diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 4d37c6d57f788..bbae4797797d9 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -1,6 +1,10 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + +from pandas.compat import HAS_PYARROW + from pandas import ( DataFrame, DatetimeIndex, @@ -171,6 +175,9 @@ def test_rolling_corr_cov_other_same_size_as_groups(self, f, expected_val): ) tm.assert_frame_equal(result, expected) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) @pytest.mark.parametrize("f", ["corr", "cov"]) def test_rolling_corr_cov_other_diff_size_as_groups(self, f, roll_frame): g = roll_frame.groupby("A") @@ -1084,6 +1091,9 @@ def test_expanding_quantile(self, interpolation, frame): expected.index = expected_index tm.assert_frame_equal(result, expected) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) @pytest.mark.parametrize("f", ["corr", "cov"]) def test_expanding_corr_cov(self, f, frame): g = frame.groupby("A") @@ -1277,6 +1287,9 @@ def test_dont_mutate_obj_after_slicing(self): tm.assert_frame_equal(result, expected_df) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_rolling_corr_with_single_integer_in_index(): # GH 44078 df = DataFrame({"a": [(1,), (1,), (1,)], "b": [4, 5, 6]}) @@ -1289,6 +1302,9 @@ def test_rolling_corr_with_single_integer_in_index(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_rolling_corr_with_tuples_in_index(): # GH 44078 df = DataFrame( diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index af3194b5085c4..17b92427f0d5d 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -6,7 +6,10 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + from pandas.compat import ( + HAS_PYARROW, IS64, is_platform_arm, is_platform_power, @@ -1326,6 +1329,9 @@ def test_rolling_corr_timedelta_index(index, window): tm.assert_almost_equal(result, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_groupby_rolling_nan_included(): # GH 35542 data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} From 26a3db1531313dd8f95c7fafee36e38a1fa1c00a Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 9 Aug 2024 09:06:38 +0200 Subject: [PATCH 7/8] fixup xfails --- pandas/tests/apply/test_frame_apply.py | 5 ++++ pandas/tests/apply/test_invalid_arg.py | 9 +++++-- pandas/tests/arithmetic/test_object.py | 6 +++++ .../arrays/categorical/test_analytics.py | 9 +++++-- .../arrays/categorical/test_constructors.py | 6 ++++- pandas/tests/arrays/integer/test_reduction.py | 9 ++++--- pandas/tests/base/test_conversion.py | 12 ++++++++- pandas/tests/copy_view/test_astype.py | 5 ++-- pandas/tests/copy_view/test_functions.py | 12 +++++---- pandas/tests/copy_view/test_interp_fillna.py | 4 ++- pandas/tests/copy_view/test_methods.py | 9 +++++-- pandas/tests/copy_view/test_replace.py | 6 +++-- pandas/tests/extension/base/ops.py | 27 ++++++++++++++++--- pandas/tests/frame/indexing/test_indexing.py | 5 +++- pandas/tests/frame/indexing/test_where.py | 3 +++ .../tests/frame/methods/test_value_counts.py | 7 ----- pandas/tests/frame/test_subclass.py | 4 --- pandas/tests/frame/test_unary.py | 11 ++++++-- .../groupby/methods/test_value_counts.py | 5 +++- pandas/tests/groupby/test_groupby.py | 5 +++- pandas/tests/groupby/test_groupby_dropna.py | 23 +++++++++++++--- pandas/tests/indexes/multi/test_join.py | 3 --- pandas/tests/indexes/object/test_indexing.py | 5 +++- pandas/tests/indexes/test_base.py | 21 ++++++++++++--- pandas/tests/indexes/test_old_base.py | 7 ++++- pandas/tests/io/parser/conftest.py | 11 +++++++- pandas/tests/reductions/test_reductions.py | 5 ++++ pandas/tests/series/indexing/test_setitem.py | 4 +++ pandas/tests/series/test_arithmetic.py | 4 --- pandas/tests/series/test_logical_ops.py | 3 --- pandas/tests/window/test_groupby.py | 16 ----------- 31 files changed, 183 insertions(+), 78 deletions(-) diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index b0475b64a844e..3be3562d23cd6 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -6,6 +6,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + from pandas.core.dtypes.dtypes import CategoricalDtype import pandas as pd @@ -1245,6 +1247,9 @@ def test_agg_multiple_mixed(): tm.assert_frame_equal(result, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_agg_multiple_mixed_raises(): # GH 20909 mdf = DataFrame( diff --git a/pandas/tests/apply/test_invalid_arg.py b/pandas/tests/apply/test_invalid_arg.py index dcb313035665e..ba970e328ae40 100644 --- a/pandas/tests/apply/test_invalid_arg.py +++ b/pandas/tests/apply/test_invalid_arg.py @@ -14,6 +14,7 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW from pandas.errors import SpecificationError from pandas import ( @@ -212,7 +213,9 @@ def transform(row): # we should raise a proper TypeError instead of propagating the pyarrow error -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) @pytest.mark.parametrize( "df, func, expected", tm.get_cython_table_params( @@ -234,7 +237,9 @@ def test_agg_cython_table_raises_frame(df, func, expected, axis, using_infer_str # we should raise a proper TypeError instead of propagating the pyarrow error -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) @pytest.mark.parametrize( "series, func, expected", chain( diff --git a/pandas/tests/arithmetic/test_object.py b/pandas/tests/arithmetic/test_object.py index 4b5156d0007bb..899ea1910d055 100644 --- a/pandas/tests/arithmetic/test_object.py +++ b/pandas/tests/arithmetic/test_object.py @@ -8,6 +8,9 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + +from pandas.compat import HAS_PYARROW import pandas.util._test_decorators as td import pandas as pd @@ -315,6 +318,9 @@ def test_add(self): expected = pd.Index(["1a", "1b", "1c"]) tm.assert_index_equal("1" + index, expected) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_sub_fail(self, using_infer_string): index = pd.Index([str(i) for i in range(10)]) diff --git a/pandas/tests/arrays/categorical/test_analytics.py b/pandas/tests/arrays/categorical/test_analytics.py index dca33dffa3996..52fd80cd196e0 100644 --- a/pandas/tests/arrays/categorical/test_analytics.py +++ b/pandas/tests/arrays/categorical/test_analytics.py @@ -6,7 +6,10 @@ from pandas._config import using_string_dtype -from pandas.compat import PYPY +from pandas.compat import ( + HAS_PYARROW, + PYPY, +) from pandas import ( Categorical, @@ -296,7 +299,9 @@ def test_nbytes(self): exp = 3 + 3 * 8 # 3 int8s for values + 3 int64s for categories assert cat.nbytes == exp - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") + @pytest.mark.xfail( + using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)" + ) def test_memory_usage(self): cat = Categorical([1, 2, 3]) diff --git a/pandas/tests/arrays/categorical/test_constructors.py b/pandas/tests/arrays/categorical/test_constructors.py index 6752a503016f8..d7eb6800e5d07 100644 --- a/pandas/tests/arrays/categorical/test_constructors.py +++ b/pandas/tests/arrays/categorical/test_constructors.py @@ -8,6 +8,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + from pandas.core.dtypes.common import ( is_float_dtype, is_integer_dtype, @@ -442,7 +444,9 @@ def test_constructor_str_unknown(self): with pytest.raises(ValueError, match="Unknown dtype"): Categorical([1, 2], dtype="foo") - @pytest.mark.xfail(using_string_dtype(), reason="Can't be NumPy strings") + @pytest.mark.xfail( + using_string_dtype() and HAS_PYARROW, reason="Can't be NumPy strings" + ) def test_constructor_np_strs(self): # GH#31499 Hashtable.map_locations needs to work on np.str_ objects cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")]) diff --git a/pandas/tests/arrays/integer/test_reduction.py b/pandas/tests/arrays/integer/test_reduction.py index db04862e4ea07..e485c7f79b475 100644 --- a/pandas/tests/arrays/integer/test_reduction.py +++ b/pandas/tests/arrays/integer/test_reduction.py @@ -1,6 +1,8 @@ import numpy as np import pytest +from pandas.compat import HAS_PYARROW + import pandas as pd from pandas import ( DataFrame, @@ -102,9 +104,10 @@ def test_groupby_reductions(op, expected): ["all", Series([True, True, True], index=["A", "B", "C"], dtype="boolean")], ], ) -def test_mixed_reductions(op, expected, using_infer_string): - if op in ["any", "all"] and using_infer_string: - expected = expected.astype("bool") +def test_mixed_reductions(request, op, expected, using_infer_string): + if op in ["any", "all"] and using_infer_string and HAS_PYARROW: + # TODO(infer_string) inconsistent result type + request.applymarker(pytest.mark.xfail(reason="TODO(infer_string)")) df = DataFrame( { "A": ["a", "b", "b"], diff --git a/pandas/tests/base/test_conversion.py b/pandas/tests/base/test_conversion.py index dd6bf3c7521f8..13a3ff048c79e 100644 --- a/pandas/tests/base/test_conversion.py +++ b/pandas/tests/base/test_conversion.py @@ -1,6 +1,10 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + +from pandas.compat import HAS_PYARROW + from pandas.core.dtypes.dtypes import DatetimeTZDtype import pandas as pd @@ -20,6 +24,7 @@ SparseArray, TimedeltaArray, ) +from pandas.core.arrays.string_ import StringArrayNumpySemantics from pandas.core.arrays.string_arrow import ArrowStringArrayNumpySemantics @@ -218,7 +223,9 @@ def test_iter_box_period(self): ) def test_values_consistent(arr, expected_type, dtype, using_infer_string): if using_infer_string and dtype == "object": - expected_type = ArrowStringArrayNumpySemantics + expected_type = ( + ArrowStringArrayNumpySemantics if HAS_PYARROW else StringArrayNumpySemantics + ) l_values = Series(arr)._values r_values = pd.Index(arr)._values assert type(l_values) is expected_type @@ -355,6 +362,9 @@ def test_to_numpy(arr, expected, index_or_series_or_array, request): tm.assert_numpy_array_equal(result, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False +) @pytest.mark.parametrize("as_series", [True, False]) @pytest.mark.parametrize( "arr", [np.array([1, 2, 3], dtype="int64"), np.array(["a", "b", "c"], dtype=object)] diff --git a/pandas/tests/copy_view/test_astype.py b/pandas/tests/copy_view/test_astype.py index 8724f62de1534..de56d5e4a07ee 100644 --- a/pandas/tests/copy_view/test_astype.py +++ b/pandas/tests/copy_view/test_astype.py @@ -5,6 +5,7 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW from pandas.compat.pyarrow import pa_version_under12p0 import pandas.util._test_decorators as td @@ -197,7 +198,7 @@ def test_astype_arrow_timestamp(): assert np.shares_memory(get_array(df, "a"), get_array(result, "a")._pa_array) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_convert_dtypes_infer_objects(): ser = Series(["a", "b", "c"]) ser_orig = ser.copy() @@ -213,7 +214,7 @@ def test_convert_dtypes_infer_objects(): tm.assert_series_equal(ser, ser_orig) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_convert_dtypes(): df = DataFrame({"a": ["a", "b"], "b": [1, 2], "c": [1.5, 2.5], "d": [True, False]}) df_orig = df.copy() diff --git a/pandas/tests/copy_view/test_functions.py b/pandas/tests/copy_view/test_functions.py index d2e2d43b0a42b..dd4dd154f74b0 100644 --- a/pandas/tests/copy_view/test_functions.py +++ b/pandas/tests/copy_view/test_functions.py @@ -3,6 +3,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + from pandas import ( DataFrame, Index, @@ -14,7 +16,7 @@ from pandas.tests.copy_view.util import get_array -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_concat_frames(): df = DataFrame({"b": ["a"] * 3}) df2 = DataFrame({"a": ["a"] * 3}) @@ -33,7 +35,7 @@ def test_concat_frames(): tm.assert_frame_equal(df, df_orig) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_concat_frames_updating_input(): df = DataFrame({"b": ["a"] * 3}) df2 = DataFrame({"a": ["a"] * 3}) @@ -153,7 +155,7 @@ def test_concat_copy_keyword(): assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") @pytest.mark.parametrize( "func", [ @@ -249,7 +251,7 @@ def test_merge_copy_keyword(): assert np.shares_memory(get_array(df2, "b"), get_array(result, "b")) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_join_on_key(): df_index = Index(["a", "b", "c"], name="key") @@ -277,7 +279,7 @@ def test_join_on_key(): tm.assert_frame_equal(df2, df2_orig) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_join_multiple_dataframes_on_key(): df_index = Index(["a", "b", "c"], name="key") diff --git a/pandas/tests/copy_view/test_interp_fillna.py b/pandas/tests/copy_view/test_interp_fillna.py index f80e9b7dcf838..fc57178b897b9 100644 --- a/pandas/tests/copy_view/test_interp_fillna.py +++ b/pandas/tests/copy_view/test_interp_fillna.py @@ -3,6 +3,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + from pandas import ( NA, DataFrame, @@ -121,7 +123,7 @@ def test_interpolate_cannot_with_object_dtype(): df.interpolate() -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_interpolate_object_convert_no_op(): df = DataFrame({"a": ["a", "b", "c"], "b": 1}) arr_a = get_array(df, "a") diff --git a/pandas/tests/copy_view/test_methods.py b/pandas/tests/copy_view/test_methods.py index 3716df8fbf855..92e1ba750fae2 100644 --- a/pandas/tests/copy_view/test_methods.py +++ b/pandas/tests/copy_view/test_methods.py @@ -3,6 +3,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + import pandas as pd from pandas import ( DataFrame, @@ -714,7 +716,7 @@ def test_head_tail(method): tm.assert_frame_equal(df, df_orig) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_infer_objects(): df = DataFrame({"a": [1, 2], "b": "c", "c": 1, "d": "x"}) df_orig = df.copy() @@ -730,6 +732,9 @@ def test_infer_objects(): tm.assert_frame_equal(df, df_orig) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_infer_objects_no_reference(): df = DataFrame( { @@ -899,7 +904,7 @@ def test_sort_values_inplace(obj, kwargs): tm.assert_equal(view, obj_orig) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") @pytest.mark.parametrize("decimals", [-1, 0, 1]) def test_round(decimals): df = DataFrame({"a": [1, 2], "b": "c"}) diff --git a/pandas/tests/copy_view/test_replace.py b/pandas/tests/copy_view/test_replace.py index c1120ccfea635..58c979fb05089 100644 --- a/pandas/tests/copy_view/test_replace.py +++ b/pandas/tests/copy_view/test_replace.py @@ -3,6 +3,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + from pandas import ( Categorical, DataFrame, @@ -59,7 +61,7 @@ def test_replace_regex_inplace_refs(): tm.assert_frame_equal(view, df_orig) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") def test_replace_regex_inplace(): df = DataFrame({"a": ["aaa", "bbb"]}) arr = get_array(df, "a") @@ -257,7 +259,7 @@ def test_replace_empty_list(): assert not df2._mgr._has_no_reference(0) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") +@pytest.mark.xfail(using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)") @pytest.mark.parametrize("value", ["d", None]) def test_replace_object_list_inplace(value): df = DataFrame({"a": ["a", "b", "c"]}) diff --git a/pandas/tests/extension/base/ops.py b/pandas/tests/extension/base/ops.py index 8df566890bcf1..ff9f3cbed64a2 100644 --- a/pandas/tests/extension/base/ops.py +++ b/pandas/tests/extension/base/ops.py @@ -7,6 +7,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + from pandas.core.dtypes.common import is_string_dtype import pandas as pd @@ -140,7 +142,12 @@ class BaseArithmeticOpsTests(BaseOpsUtil): series_array_exc: type[Exception] | None = TypeError divmod_exc: type[Exception] | None = TypeError - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + # TODO(infer_string) need to remove import of pyarrow + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, + reason="TODO(infer_string)", + strict=False, + ) def test_arith_series_with_scalar(self, data, all_arithmetic_operators): # series & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): @@ -150,7 +157,11 @@ def test_arith_series_with_scalar(self, data, all_arithmetic_operators): ser = pd.Series(data) self.check_opname(ser, op_name, ser.iloc[0]) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, + reason="TODO(infer_string)", + strict=False, + ) def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): # frame & scalar if all_arithmetic_operators == "__rmod__" and is_string_dtype(data.dtype): @@ -160,14 +171,22 @@ def test_arith_frame_with_scalar(self, data, all_arithmetic_operators): df = pd.DataFrame({"A": data}) self.check_opname(df, op_name, data[0]) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, + reason="TODO(infer_string)", + strict=False, + ) def test_arith_series_with_array(self, data, all_arithmetic_operators): # ndarray & other series op_name = all_arithmetic_operators ser = pd.Series(data) self.check_opname(ser, op_name, pd.Series([ser.iloc[0]] * len(ser))) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, + reason="TODO(infer_string)", + strict=False, + ) def test_divmod(self, data): ser = pd.Series(data) self._check_divmod_op(ser, divmod, 1) diff --git a/pandas/tests/frame/indexing/test_indexing.py b/pandas/tests/frame/indexing/test_indexing.py index 826ac2be3339b..8ce4e8725d632 100644 --- a/pandas/tests/frame/indexing/test_indexing.py +++ b/pandas/tests/frame/indexing/test_indexing.py @@ -12,6 +12,7 @@ from pandas._config import using_string_dtype from pandas._libs import iNaT +from pandas.compat import HAS_PYARROW from pandas.errors import InvalidIndexError from pandas.core.dtypes.common import is_integer @@ -1148,7 +1149,9 @@ def test_loc_setitem_datetimelike_with_inference(self): ) tm.assert_series_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)") + @pytest.mark.xfail( + using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)" + ) def test_getitem_boolean_indexing_mixed(self): df = DataFrame( { diff --git a/pandas/tests/frame/indexing/test_where.py b/pandas/tests/frame/indexing/test_where.py index 6f387abf523b9..32a827c25c77a 100644 --- a/pandas/tests/frame/indexing/test_where.py +++ b/pandas/tests/frame/indexing/test_where.py @@ -940,6 +940,9 @@ def test_where_nullable_invalid_na(frame_or_series, any_numeric_ea_dtype): obj.mask(mask, null) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) @given(data=OPTIONAL_ONE_OF_ALL) def test_where_inplace_casting(data): # GH 22051 diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py index ab79915704f92..4136d641ef67f 100644 --- a/pandas/tests/frame/methods/test_value_counts.py +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -1,10 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - -from pandas.compat import HAS_PYARROW - import pandas as pd import pandas._testing as tm @@ -136,9 +132,6 @@ def test_data_frame_value_counts_dropna_true(nulls_fixture): tm.assert_series_equal(result, expected) -@pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" -) def test_data_frame_value_counts_dropna_false(nulls_fixture): # GH 41334 df = pd.DataFrame( diff --git a/pandas/tests/frame/test_subclass.py b/pandas/tests/frame/test_subclass.py index a70d67bb2ada9..7d18ef28a722d 100644 --- a/pandas/tests/frame/test_subclass.py +++ b/pandas/tests/frame/test_subclass.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - import pandas as pd from pandas import ( DataFrame, @@ -149,7 +147,6 @@ def nonexistence(self): with pytest.raises(AttributeError, match=".*i_dont_exist.*"): A().nonexistence - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_subclass_align(self): # GH 12983 df1 = tm.SubclassedDataFrame( @@ -179,7 +176,6 @@ def test_subclass_align(self): assert isinstance(res2, tm.SubclassedSeries) tm.assert_series_equal(res2, exp2.c) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) def test_subclass_align_combinations(self): # GH 12983 df = tm.SubclassedDataFrame({"a": [1, 3, 5], "b": [1, 3, 5]}, index=list("ACE")) diff --git a/pandas/tests/frame/test_unary.py b/pandas/tests/frame/test_unary.py index bc8016a6a4e23..5bbe047078c6e 100644 --- a/pandas/tests/frame/test_unary.py +++ b/pandas/tests/frame/test_unary.py @@ -5,6 +5,7 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW from pandas.compat.numpy import np_version_gte1p25 import pandas as pd @@ -42,7 +43,11 @@ def test_neg_object(self, df, expected): tm.assert_frame_equal(-df, expected) tm.assert_series_equal(-df["a"], expected["a"]) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, + reason="TODO(infer_string)", + strict=False, + ) @pytest.mark.parametrize( "df_data", [ @@ -131,7 +136,9 @@ def test_pos_object(self, df_data): tm.assert_frame_equal(+df, df) tm.assert_series_equal(+df["a"], df["a"]) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + @pytest.mark.xfail( + using_string_dtype() and HAS_PYARROW, reason="TODO(infer_string)" + ) @pytest.mark.filterwarnings("ignore:Applying:DeprecationWarning") def test_pos_object_raises(self): # GH#21380 diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 6c5a36b11be25..18802ebd002fc 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -9,6 +9,7 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW import pandas.util._test_decorators as td from pandas import ( @@ -500,7 +501,9 @@ def test_dropna_combinations( tm.assert_series_equal(result, expected) -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False +) @pytest.mark.parametrize( "dropna, expected_data, expected_index", [ diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 1f2b8ccd4d660..11b874d0b1608 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -8,6 +8,7 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW from pandas.errors import SpecificationError import pandas.util._test_decorators as td @@ -1408,7 +1409,9 @@ def g(group): # TODO harmonize error messages -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False +) @pytest.mark.parametrize("grouper", ["A", ["A", "B"]]) def test_set_group_name(df, grouper, using_infer_string): def f(group): diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py index ce0af60c57c44..02071acf378dd 100644 --- a/pandas/tests/groupby/test_groupby_dropna.py +++ b/pandas/tests/groupby/test_groupby_dropna.py @@ -3,6 +3,7 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW from pandas.compat.pyarrow import pa_version_under10p1 from pandas.core.dtypes.missing import na_value_for_dtype @@ -11,11 +12,10 @@ import pandas._testing as tm from pandas.tests.groupby import get_groupby_method_args -pytestmark = pytest.mark.xfail( - using_string_dtype(), reason="TODO(infer_string)", strict=False -) - +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False +) @pytest.mark.parametrize( "dropna, tuples, outputs", [ @@ -59,6 +59,9 @@ def test_groupby_dropna_multi_index_dataframe_nan_in_one_group( tm.assert_frame_equal(grouped, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False +) @pytest.mark.parametrize( "dropna, tuples, outputs", [ @@ -135,6 +138,9 @@ def test_groupby_dropna_normal_index_dataframe(dropna, idx, outputs): tm.assert_frame_equal(grouped, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False +) @pytest.mark.parametrize( "dropna, idx, expected", [ @@ -209,6 +215,9 @@ def test_groupby_dataframe_slice_then_transform(dropna, index): tm.assert_series_equal(result, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False +) @pytest.mark.parametrize( "dropna, tuples, outputs", [ @@ -290,6 +299,9 @@ def test_groupby_dropna_datetime_like_data( tm.assert_frame_equal(grouped, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False +) @pytest.mark.parametrize( "dropna, data, selected_data, levels", [ @@ -375,6 +387,9 @@ def test_groupby_dropna_with_multiindex_input(input_index, keys, series): tm.assert_equal(result, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" +) def test_groupby_nan_included(): # GH 35646 data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]} diff --git a/pandas/tests/indexes/multi/test_join.py b/pandas/tests/indexes/multi/test_join.py index ec6f5aaf4b6f1..2be6bba475af7 100644 --- a/pandas/tests/indexes/multi/test_join.py +++ b/pandas/tests/indexes/multi/test_join.py @@ -1,8 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - from pandas import ( DataFrame, Index, @@ -14,7 +12,6 @@ import pandas._testing as tm -@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) @pytest.mark.parametrize("other", [["three", "one", "two"], ["one"], ["one", "three"]]) def test_join_level(idx, other, join_type): other = Index(other) diff --git a/pandas/tests/indexes/object/test_indexing.py b/pandas/tests/indexes/object/test_indexing.py index e5756ec6ccd9f..e3428d1060dbe 100644 --- a/pandas/tests/indexes/object/test_indexing.py +++ b/pandas/tests/indexes/object/test_indexing.py @@ -9,6 +9,7 @@ NA, is_matching_na, ) +from pandas.compat import HAS_PYARROW import pandas.util._test_decorators as td import pandas as pd @@ -31,7 +32,9 @@ def test_get_indexer_strings(self, method, expected): tm.assert_numpy_array_equal(actual, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_get_indexer_strings_raises(self, using_infer_string): index = Index(["b", "c"]) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index cf5bcf31250a0..7ec66100b7291 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -10,7 +10,10 @@ from pandas._config import using_string_dtype -from pandas.compat import IS64 +from pandas.compat import ( + HAS_PYARROW, + IS64, +) from pandas.errors import InvalidIndexError import pandas.util._test_decorators as td @@ -73,7 +76,9 @@ def test_constructor_casting(self, index): tm.assert_contains_all(arr, new_index) tm.assert_index_equal(index, new_index) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_constructor_copy(self, using_infer_string): index = Index(list("abc"), name="name") arr = np.array(index) @@ -338,7 +343,11 @@ def test_constructor_empty_special(self, empty, klass): def test_view_with_args(self, index): index.view("i8") - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, + reason="TODO(infer_string)", + strict=False, + ) @pytest.mark.parametrize( "index", [ @@ -821,7 +830,11 @@ def test_isin(self, values, index, expected): expected = np.array(expected, dtype=bool) tm.assert_numpy_array_equal(result, expected) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, + reason="TODO(infer_string)", + strict=False, + ) def test_isin_nan_common_object( self, nulls_fixture, nulls_fixture2, using_infer_string ): diff --git a/pandas/tests/indexes/test_old_base.py b/pandas/tests/indexes/test_old_base.py index a41d50cfaa48d..9993a21d93f12 100644 --- a/pandas/tests/indexes/test_old_base.py +++ b/pandas/tests/indexes/test_old_base.py @@ -9,6 +9,7 @@ from pandas._config import using_string_dtype from pandas._libs.tslibs import Timestamp +from pandas.compat import HAS_PYARROW from pandas.core.dtypes.common import ( is_integer_dtype, @@ -245,7 +246,11 @@ def test_repr_max_seq_item_setting(self, simple_index): repr(idx) assert "..." not in str(idx) - @pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, + reason="TODO(infer_string)", + strict=False, + ) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") def test_ensure_copied_data(self, index): # Check the "copy" argument of each Index.__new__ is honoured diff --git a/pandas/tests/io/parser/conftest.py b/pandas/tests/io/parser/conftest.py index 02332a70582af..90f77a7024235 100644 --- a/pandas/tests/io/parser/conftest.py +++ b/pandas/tests/io/parser/conftest.py @@ -190,7 +190,16 @@ def _get_all_parser_float_precision_combinations(): parser = parser.values[0] for precision in parser.float_precision_choices: # Re-wrap in pytest.param for pyarrow - mark = pytest.mark.single_cpu if parser.engine == "pyarrow" else () + mark = ( + [ + pytest.mark.single_cpu, + pytest.mark.skipif( + not HAS_PYARROW, reason="pyarrow is not installed" + ), + ] + if parser.engine == "pyarrow" + else () + ) param = pytest.param((parser(), precision), marks=mark) params.append(param) ids.append(f"{parser_id}-{precision}") diff --git a/pandas/tests/reductions/test_reductions.py b/pandas/tests/reductions/test_reductions.py index 66799732be064..26fecef6ed0e6 100644 --- a/pandas/tests/reductions/test_reductions.py +++ b/pandas/tests/reductions/test_reductions.py @@ -9,6 +9,8 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW + import pandas as pd from pandas import ( Categorical, @@ -1204,6 +1206,9 @@ def test_idxminmax_object_dtype(self, using_infer_string): with pytest.raises(TypeError, match=msg): ser3.idxmin(skipna=False) + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_idxminmax_object_frame(self): # GH#4279 df = DataFrame([["zimm", 2.5], ["biff", 1.0], ["bid", 12.0]]) diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 742091d761d62..07679ec75f589 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -10,6 +10,7 @@ from pandas._config import using_string_dtype +from pandas.compat import HAS_PYARROW from pandas.compat.numpy import np_version_gte1p24 from pandas.errors import IndexingError @@ -822,6 +823,9 @@ def test_mask_key(self, obj, key, expected, raises, val, indexer_sli): else: indexer_sli(obj)[mask] = val + @pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + ) def test_series_where(self, obj, key, expected, raises, val, is_inplace): mask = np.zeros(obj.shape, dtype=bool) mask[key] = True diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index ed56a5c1ee855..ff84b5c52183b 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -13,7 +13,6 @@ from pandas._libs import lib from pandas._libs.tslibs import IncompatibleFrequency -from pandas.compat import HAS_PYARROW import pandas as pd from pandas import ( @@ -194,9 +193,6 @@ def test_string_addition(self, target_add, input_value, expected_value): expected = Series(expected_value) tm.assert_series_equal(result, expected) - @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" - ) def test_divmod(self): # GH#25557 a = Series([1, 1, 1, np.nan], index=["a", "b", "c", "d"]) diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index 94934e6c770a1..262ec35b472ad 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -467,9 +467,6 @@ def test_logical_ops_label_based(self, using_infer_string): with pytest.raises(TypeError, match=msg): t & v - @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" - ) def test_logical_ops_df_compat(self): # GH#1134 s1 = Series([True, False, True], index=list("ABC"), name="x") diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index bbae4797797d9..4d37c6d57f788 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -1,10 +1,6 @@ import numpy as np import pytest -from pandas._config import using_string_dtype - -from pandas.compat import HAS_PYARROW - from pandas import ( DataFrame, DatetimeIndex, @@ -175,9 +171,6 @@ def test_rolling_corr_cov_other_same_size_as_groups(self, f, expected_val): ) tm.assert_frame_equal(result, expected) - @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" - ) @pytest.mark.parametrize("f", ["corr", "cov"]) def test_rolling_corr_cov_other_diff_size_as_groups(self, f, roll_frame): g = roll_frame.groupby("A") @@ -1091,9 +1084,6 @@ def test_expanding_quantile(self, interpolation, frame): expected.index = expected_index tm.assert_frame_equal(result, expected) - @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" - ) @pytest.mark.parametrize("f", ["corr", "cov"]) def test_expanding_corr_cov(self, f, frame): g = frame.groupby("A") @@ -1287,9 +1277,6 @@ def test_dont_mutate_obj_after_slicing(self): tm.assert_frame_equal(result, expected_df) -@pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" -) def test_rolling_corr_with_single_integer_in_index(): # GH 44078 df = DataFrame({"a": [(1,), (1,), (1,)], "b": [4, 5, 6]}) @@ -1302,9 +1289,6 @@ def test_rolling_corr_with_single_integer_in_index(): tm.assert_frame_equal(result, expected) -@pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" -) def test_rolling_corr_with_tuples_in_index(): # GH 44078 df = DataFrame( From b8a61a078220630ec4c826ffecc59438cb01443d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 9 Aug 2024 10:05:53 +0200 Subject: [PATCH 8/8] less strict --- pandas/tests/frame/methods/test_value_counts.py | 7 +++++++ pandas/tests/series/indexing/test_setitem.py | 4 +++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_value_counts.py b/pandas/tests/frame/methods/test_value_counts.py index 4136d641ef67f..7670b53f23173 100644 --- a/pandas/tests/frame/methods/test_value_counts.py +++ b/pandas/tests/frame/methods/test_value_counts.py @@ -1,6 +1,10 @@ import numpy as np import pytest +from pandas._config import using_string_dtype + +from pandas.compat import HAS_PYARROW + import pandas as pd import pandas._testing as tm @@ -132,6 +136,9 @@ def test_data_frame_value_counts_dropna_true(nulls_fixture): tm.assert_series_equal(result, expected) +@pytest.mark.xfail( + using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)", strict=False +) def test_data_frame_value_counts_dropna_false(nulls_fixture): # GH 41334 df = pd.DataFrame( diff --git a/pandas/tests/series/indexing/test_setitem.py b/pandas/tests/series/indexing/test_setitem.py index 07679ec75f589..71ba2dab671ef 100644 --- a/pandas/tests/series/indexing/test_setitem.py +++ b/pandas/tests/series/indexing/test_setitem.py @@ -824,7 +824,9 @@ def test_mask_key(self, obj, key, expected, raises, val, indexer_sli): indexer_sli(obj)[mask] = val @pytest.mark.xfail( - using_string_dtype() and not HAS_PYARROW, reason="TODO(infer_string)" + using_string_dtype() and not HAS_PYARROW, + reason="TODO(infer_string)", + strict=False, ) def test_series_where(self, obj, key, expected, raises, val, is_inplace): mask = np.zeros(obj.shape, dtype=bool)