From d81b2b43f7726a9c3253286b28e5947165587b99 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 10 Mar 2025 10:57:47 -0700 Subject: [PATCH 1/5] CI/TST: Address TestArrowArray::test_reduce_series_numeric supporting skew --- pandas/tests/extension/test_arrow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index fbd3868f62899..2f78561b2ad0e 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -42,6 +42,7 @@ pa_version_under11p0, pa_version_under13p0, pa_version_under14p0, + pa_version_under20p0, ) from pandas.core.dtypes.dtypes import ( @@ -453,7 +454,7 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques self.check_accumulate(ser, op_name, skipna) def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: - if op_name in ["kurt", "skew"]: + if op_name == "kurt" or (pa_version_under20p0 and op_name == "skew"): return False dtype = ser.dtype From 80d4cfd8eb9c699369e3464532b18210b2c80796 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 10 Mar 2025 12:31:54 -0700 Subject: [PATCH 2/5] Add it to compat/__init__.py --- pandas/compat/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index 138456f877c5f..9f3bfdc205498 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -35,6 +35,7 @@ pa_version_under17p0, pa_version_under18p0, pa_version_under19p0, + pa_version_under20p0, ) if TYPE_CHECKING: @@ -168,4 +169,5 @@ def is_ci_environment() -> bool: "pa_version_under17p0", "pa_version_under18p0", "pa_version_under19p0", + "pa_version_under20p0", ] From f030b258d9e4a5430a89f63775b06aa0e30ec028 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 10 Mar 2025 15:10:30 -0700 Subject: [PATCH 3/5] Update _supports_reduction with skew --- pandas/tests/extension/test_arrow.py | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 2f78561b2ad0e..240429f0752ca 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -461,24 +461,17 @@ def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has # no attribute "pyarrow_dtype" pa_dtype = dtype.pyarrow_dtype # type: ignore[union-attr] - if pa.types.is_temporal(pa_dtype) and op_name in ["sum", "var", "prod"]: + if pa.types.is_temporal(pa_dtype) and op_name in ["sum", "var", "prod", "skew"]: if pa.types.is_duration(pa_dtype) and op_name in ["sum"]: # summing timedeltas is one case that *is* well-defined pass else: return False - elif pa.types.is_binary(pa_dtype) and op_name == "sum": + elif pa.types.is_binary(pa_dtype) and op_name in ["sum", "skew"]: return False elif ( pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype) - ) and op_name in [ - "mean", - "median", - "prod", - "std", - "sem", - "var", - ]: + ) and op_name in ["mean", "median", "prod", "std", "sem", "var", "skew"]: return False if ( @@ -583,7 +576,7 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_frame(self, data, all_numeric_reductions, skipna, request): op_name = all_numeric_reductions - if op_name == "skew": + if op_name == "skew" and pa_version_under20p0: if data.dtype._is_numeric: mark = pytest.mark.xfail(reason="skew not implemented") request.applymarker(mark) From dda760444fae8bf84b7dde0f9f279b744807d71e Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 10 Mar 2025 16:52:14 -0700 Subject: [PATCH 4/5] Add xfail for skew --- pandas/tests/extension/test_arrow.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 240429f0752ca..daf473bffc994 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -555,7 +555,7 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): else: cmp_dtype = arr.dtype elif arr.dtype.name == "decimal128(7, 3)[pyarrow]": - if op_name not in ["median", "var", "std", "sem"]: + if op_name not in ["median", "var", "std", "sem", "skew"]: cmp_dtype = arr.dtype else: cmp_dtype = "float64[pyarrow]" @@ -573,6 +573,24 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): }[arr.dtype.kind] return cmp_dtype + @pytest.mark.filterwarnings("ignore::RuntimeWarning") + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request): + if ( + skipna + and all_numeric_reductions == "skew" + and ( + pa.types.is_integer(data.dtype.pyarrow_dtype) + or pa.types.is_floating(data.dtype.pyarrow_dtype) + ) + ): + request.applymarker( + pytest.mark.xfail( + reason="https://github.com/apache/arrow/issues/45733", + ) + ) + return super().test_reduce_series_numeric(data, all_numeric_reductions, skipna) + @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_frame(self, data, all_numeric_reductions, skipna, request): op_name = all_numeric_reductions From 5e41b4714f6f659655f9a1571ae453938ff17fd1 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Mon, 10 Mar 2025 18:05:46 -0700 Subject: [PATCH 5/5] Add version specifier --- pandas/tests/extension/test_arrow.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index daf473bffc994..7b7c2a632aba2 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -577,7 +577,8 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request): if ( - skipna + not pa_version_under20p0 + and skipna and all_numeric_reductions == "skew" and ( pa.types.is_integer(data.dtype.pyarrow_dtype)