From e89f94a1c98a7ea32e814600a8ed942d2a6dc64c Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 10 Mar 2025 21:32:47 -0700 Subject: [PATCH 1/6] Backport PR #61098: CI/TST: Address TestArrowArray::test_reduce_series_numeric supporting skew --- pandas/compat/__init__.py | 2 ++ pandas/tests/extension/test_arrow.py | 24 +++++++++++++++++++++--- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 9b6b1ab3b8909..ff99d6b759d66 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -35,6 +35,7 @@ pa_version_under17p0, pa_version_under18p0, pa_version_under19p0, + pa_version_under20p0, ) if TYPE_CHECKING: @@ -195,6 +196,7 @@ def get_bz2_file() -> type[pandas.compat.compressors.BZ2File]: "pa_version_under17p0", "pa_version_under18p0", "pa_version_under19p0", + "pa_version_under20p0", "HAS_PYARROW", "IS64", "ISMUSL", diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index a63cde8022e24..acdf274f00b1d 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -40,6 +40,7 @@ pa_version_under11p0, pa_version_under13p0, pa_version_under14p0, + pa_version_under20p0, ) from pandas.core.dtypes.dtypes import ( @@ -448,6 +449,9 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques self.check_accumulate(ser, op_name, skipna) def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: + if op_name == "kurt" or (pa_version_under20p0 and op_name == "skew"): + return False + dtype = ser.dtype # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has # no attribute "pyarrow_dtype" @@ -464,7 +468,7 @@ def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: pass else: return False - elif pa.types.is_binary(pa_dtype) and op_name == "sum": + elif pa.types.is_binary(pa_dtype) and op_name in ["sum", "skew"]: return False elif ( pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype) @@ -537,6 +541,20 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque "median", }: request.applymarker(xfail_mark) + elif ( + not pa_version_under20p0 + and skipna + and all_numeric_reductions == "skew" + and ( + pa.types.is_integer(data.dtype.pyarrow_dtype) + or pa.types.is_floating(data.dtype.pyarrow_dtype) + ) + ): + request.applymarker( + pytest.mark.xfail( + reason="https://github.com/apache/arrow/issues/45733", + ) + ) super().test_reduce_series_numeric(data, all_numeric_reductions, skipna) @pytest.mark.parametrize("skipna", [True, False]) @@ -563,7 +581,7 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): if op_name in ["max", "min"]: cmp_dtype = arr.dtype elif arr.dtype.name == "decimal128(7, 3)[pyarrow]": - if op_name not in ["median", "var", "std"]: + if op_name not in ["median", "var", "std", "skew"]: cmp_dtype = arr.dtype else: cmp_dtype = "float64[pyarrow]" @@ -582,7 +600,7 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_frame(self, data, all_numeric_reductions, skipna, request): op_name = all_numeric_reductions - if op_name == "skew": + if op_name == "skew" and pa_version_under20p0: if data.dtype._is_numeric: mark = pytest.mark.xfail(reason="skew not implemented") request.applymarker(mark) From 60d83c1f33e5abc095da140102fe75c15d3d2dbb Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Mar 2025 15:30:40 -0700 Subject: [PATCH 2/6] remove skew from check --- pandas/tests/extension/test_arrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index acdf274f00b1d..2e63051b3bddb 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -529,7 +529,7 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque f"pyarrow={pa.__version__} for {pa_dtype}" ), ) - if all_numeric_reductions in {"skew", "kurt"} and ( + if all_numeric_reductions == "kurt" and ( dtype._is_numeric or dtype.kind == "b" ): request.applymarker(xfail_mark) From bf585369c7317f1a3f985d230d3f6baa68cdd613 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Mar 2025 17:00:22 -0700 Subject: [PATCH 3/6] Remove kurt too --- pandas/tests/extension/test_arrow.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2e63051b3bddb..15003b0b3b3a0 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -529,12 +529,7 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque f"pyarrow={pa.__version__} for {pa_dtype}" ), ) - if all_numeric_reductions == "kurt" and ( - dtype._is_numeric or dtype.kind == "b" - ): - request.applymarker(xfail_mark) - - elif pa.types.is_boolean(pa_dtype) and all_numeric_reductions in { + if pa.types.is_boolean(pa_dtype) and all_numeric_reductions in { "sem", "std", "var", From 85df4aeb3e2eb09fc1ea0112b2e6f564c36de6e0 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Tue, 11 Mar 2025 17:31:54 -0700 Subject: [PATCH 4/6] Add skew to bool skip list --- pandas/tests/extension/test_arrow.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 15003b0b3b3a0..237ccfb53e2ba 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -534,6 +534,7 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque "std", "var", "median", + "skew", }: request.applymarker(xfail_mark) elif ( From 5ae8f7f846fe733d846e455c301613471b03559f Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 12 Mar 2025 10:10:14 -0700 Subject: [PATCH 5/6] Add boolean --- pandas/tests/extension/test_arrow.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 237ccfb53e2ba..ece36af746306 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -534,7 +534,6 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque "std", "var", "median", - "skew", }: request.applymarker(xfail_mark) elif ( @@ -542,8 +541,9 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque and skipna and all_numeric_reductions == "skew" and ( - pa.types.is_integer(data.dtype.pyarrow_dtype) - or pa.types.is_floating(data.dtype.pyarrow_dtype) + pa.types.is_integer(pa_dtype) + or pa.types.is_floating(pa_dtype) + or pa.types.is_boolean(pa_dtype) ) ): request.applymarker( From c6154d5fa84129c42b0771cd0e98a0b71e0a2781 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 12 Mar 2025 16:58:04 -0700 Subject: [PATCH 6/6] bool fails regardless of skipna --- pandas/tests/extension/test_arrow.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index ece36af746306..17fe36c4b4469 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -538,12 +538,15 @@ def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, reque request.applymarker(xfail_mark) elif ( not pa_version_under20p0 - and skipna and all_numeric_reductions == "skew" and ( - pa.types.is_integer(pa_dtype) - or pa.types.is_floating(pa_dtype) - or pa.types.is_boolean(pa_dtype) + pa.types.is_boolean(pa_dtype) + or ( + skipna + and ( + pa.types.is_integer(pa_dtype) or pa.types.is_floating(pa_dtype) + ) + ) ) ): request.applymarker(