From 1e56b80a99b02f1d91dcc03e98281f9ed7677de0 Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Mon, 15 Jul 2024 07:12:26 -0400 Subject: [PATCH] unskip more EA reduction tests --- pandas/core/arrays/masked.py | 2 +- pandas/tests/extension/base/reduce.py | 4 +- .../tests/extension/decimal/test_decimal.py | 2 + pandas/tests/extension/test_arrow.py | 40 +++++-------------- pandas/tests/extension/test_masked.py | 6 +-- 5 files changed, 18 insertions(+), 36 deletions(-) diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 93471788e72ab..92ed690e527c7 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -1198,7 +1198,7 @@ def _wrap_na_result(self, *, name, axis, mask_size): mask = np.ones(mask_size, dtype=bool) float_dtyp = "float32" if self.dtype == "Float32" else "float64" - if name in ["mean", "median", "var", "std", "skew", "kurt"]: + if name in ["mean", "median", "var", "std", "skew", "kurt", "sem"]: np_dtype = float_dtyp elif name in ["min", "max"] or self.dtype.itemsize == 8: np_dtype = self.dtype.numpy_dtype.name diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index 3e357f99cfb03..4b3431d938f96 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -56,7 +56,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): arr = ser.array df = pd.DataFrame({"a": arr}) - kwargs = {"ddof": 1} if op_name in ["var", "std"] else {} + kwargs = {"ddof": 1} if op_name in ["var", "std", "sem"] else {} cmp_dtype = self._get_expected_reduction_dtype(arr, op_name, skipna) @@ -119,7 +119,7 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions ser = pd.Series(data) - if op_name in ["count", "kurt", "sem"]: + if op_name == "count": pytest.skip(f"{op_name} not an array method") if not self._supports_reduction(ser, op_name): diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 6f18761f77138..070feb1fec4b9 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -72,6 +72,8 @@ def _get_expected_exception( return None def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: + if op_name in ["kurt", "sem"]: + return False return True def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 6d14f04383a65..ea9c5096638d5 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -467,17 +467,14 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques self.check_accumulate(ser, op_name, skipna) def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: + if op_name in ["kurt", "skew"]: + return False + dtype = ser.dtype # error: Item "dtype[Any]" of "dtype[Any] | ExtensionDtype" has # no attribute "pyarrow_dtype" pa_dtype = dtype.pyarrow_dtype # type: ignore[union-attr] - if pa.types.is_temporal(pa_dtype) and op_name in [ - "sum", - "var", - "skew", - "kurt", - "prod", - ]: + if pa.types.is_temporal(pa_dtype) and op_name in ["sum", "var", "prod"]: if pa.types.is_duration(pa_dtype) and op_name in ["sum"]: # summing timedeltas is one case that *is* well-defined pass @@ -493,8 +490,6 @@ def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool: "std", "sem", "var", - "skew", - "kurt", ]: return False @@ -541,23 +536,6 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): expected = getattr(alt, op_name)(skipna=skipna) tm.assert_almost_equal(result, expected) - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request): - dtype = data.dtype - pa_dtype = dtype.pyarrow_dtype - - xfail_mark = pytest.mark.xfail( - raises=TypeError, - reason=( - f"{all_numeric_reductions} is not implemented in " - f"pyarrow={pa.__version__} for {pa_dtype}" - ), - ) - if all_numeric_reductions in {"skew", "kurt"} and dtype._is_numeric: - request.applymarker(xfail_mark) - - super().test_reduce_series_numeric(data, all_numeric_reductions, skipna) - @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_boolean( self, data, all_boolean_reductions, skipna, na_value, request @@ -596,11 +574,11 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): else: cmp_dtype = arr.dtype elif arr.dtype.name == "decimal128(7, 3)[pyarrow]": - if op_name not in ["median", "var", "std"]: + if op_name not in ["median", "var", "std", "sem"]: cmp_dtype = arr.dtype else: cmp_dtype = "float64[pyarrow]" - elif op_name in ["median", "var", "std", "mean", "skew"]: + elif op_name in ["median", "var", "std", "mean", "skew", "sem"]: cmp_dtype = "float64[pyarrow]" elif op_name in ["sum", "prod"] and pa.types.is_boolean(pa_type): cmp_dtype = "uint64[pyarrow]" @@ -619,7 +597,11 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna, request): if data.dtype._is_numeric: mark = pytest.mark.xfail(reason="skew not implemented") request.applymarker(mark) - elif op_name == "std" and pa.types.is_date64(data._pa_array.type) and skipna: + elif ( + op_name in ["std", "sem"] + and pa.types.is_date64(data._pa_array.type) + and skipna + ): # overflow mark = pytest.mark.xfail(reason="Cannot cast") request.applymarker(mark) diff --git a/pandas/tests/extension/test_masked.py b/pandas/tests/extension/test_masked.py index 69ce42203d510..3b9079d06e231 100644 --- a/pandas/tests/extension/test_masked.py +++ b/pandas/tests/extension/test_masked.py @@ -301,7 +301,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): if is_float_dtype(arr.dtype): cmp_dtype = arr.dtype.name - elif op_name in ["mean", "median", "var", "std", "skew"]: + elif op_name in ["mean", "median", "var", "std", "skew", "kurt", "sem"]: cmp_dtype = "Float64" elif op_name in ["max", "min"]: cmp_dtype = arr.dtype.name @@ -323,9 +323,7 @@ def _get_expected_reduction_dtype(self, arr, op_name: str, skipna: bool): else "UInt64" ) elif arr.dtype.kind == "b": - if op_name in ["mean", "median", "var", "std", "skew"]: - cmp_dtype = "Float64" - elif op_name in ["min", "max"]: + if op_name in ["min", "max"]: cmp_dtype = "boolean" elif op_name in ["sum", "prod"]: cmp_dtype = (