From e0305e9e962391e70b9314a523a20d097340bf05 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 3 Aug 2023 15:08:03 -0700 Subject: [PATCH] REF: de-duplicate check_reduce_frame --- pandas/tests/extension/base/reduce.py | 46 ++++++++++++++++++- .../tests/extension/decimal/test_decimal.py | 30 ++++-------- pandas/tests/extension/test_arrow.py | 26 ++++------- pandas/tests/extension/test_boolean.py | 17 +------ pandas/tests/extension/test_masked_numeric.py | 26 ++--------- pandas/tests/extension/test_numpy.py | 3 +- 6 files changed, 70 insertions(+), 78 deletions(-) diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index 8f3c919cb0957..b7edfb860549c 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -1,3 +1,4 @@ +from typing import final import warnings import pytest @@ -15,6 +16,9 @@ class BaseReduceTests(BaseExtensionTests): """ def check_reduce(self, s, op_name, skipna): + # We perform the same operation on the np.float64 data and check + # that the results match. Override if you need to cast to something + # other than float64. res_op = getattr(s, op_name) exp_op = getattr(s.astype("float64"), op_name) if op_name == "count": @@ -25,6 +29,43 @@ def check_reduce(self, s, op_name, skipna): expected = exp_op(skipna=skipna) tm.assert_almost_equal(result, expected) + def _get_expected_reduction_dtype(self, arr, op_name: str): + # Find the expected dtype when the given reduction is done on a DataFrame + # column with this array. The default assumes float64-like behavior, + # i.e. retains the dtype. + return arr.dtype + + # We anticipate that authors should not need to override check_reduce_frame, + # but should be able to do any necessary overriding in + # _get_expected_reduction_dtype. If you have a use case where this + # does not hold, please let us know at github.com/pandas-dev/pandas/issues. + @final + def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): + # Check that the 2D reduction done in a DataFrame reduction "looks like" + # a wrapped version of the 1D reduction done by Series. + arr = ser.array + df = pd.DataFrame({"a": arr}) + + kwargs = {"ddof": 1} if op_name in ["var", "std"] else {} + + cmp_dtype = self._get_expected_reduction_dtype(arr, op_name) + + # The DataFrame method just calls arr._reduce with keepdims=True, + # so this first check is perfunctory. + result1 = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs) + result2 = getattr(df, op_name)(skipna=skipna, **kwargs).array + tm.assert_extension_array_equal(result1, result2) + + # Check that the 2D reduction looks like a wrapped version of the + # 1D reduction + if not skipna and ser.isna().any(): + expected = pd.array([pd.NA], dtype=cmp_dtype) + else: + exp_value = getattr(ser.dropna(), op_name)() + expected = pd.array([exp_value], dtype=cmp_dtype) + + tm.assert_extension_array_equal(result1, expected) + class BaseNoReduceTests(BaseReduceTests): """we don't define any reductions""" @@ -71,9 +112,12 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna): def test_reduce_frame(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions s = pd.Series(data) - if not is_numeric_dtype(s): + if not is_numeric_dtype(s.dtype): pytest.skip("not numeric dtype") + if op_name in ["count", "kurt", "sem"]: + pytest.skip(f"{op_name} not an array method") + self.check_reduce_frame(s, op_name, skipna) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index b2d47ec7d8f32..944ed0dbff66e 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -160,27 +160,6 @@ def check_reduce(self, s, op_name, skipna): expected = getattr(np.asarray(s), op_name)() tm.assert_almost_equal(result, expected) - def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): - arr = ser.array - df = pd.DataFrame({"a": arr}) - - if op_name in ["count", "kurt", "sem", "skew", "median"]: - assert not hasattr(arr, op_name) - pytest.skip(f"{op_name} not an array method") - - result1 = arr._reduce(op_name, skipna=skipna, keepdims=True) - result2 = getattr(df, op_name)(skipna=skipna).array - - tm.assert_extension_array_equal(result1, result2) - - if not skipna and ser.isna().any(): - expected = DecimalArray([pd.NA]) - else: - exp_value = getattr(ser.dropna(), op_name)() - expected = DecimalArray([exp_value]) - - tm.assert_extension_array_equal(result1, expected) - def test_reduction_without_keepdims(self): # GH52788 # test _reduce without keepdims @@ -205,7 +184,14 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): class TestNumericReduce(Reduce, base.BaseNumericReduceTests): - pass + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_frame(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + if op_name in ["skew", "median"]: + assert not hasattr(data, op_name) + pytest.skip(f"{op_name} not an array method") + + return super().test_reduce_frame(data, all_numeric_reductions, skipna) class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests): diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 7c4ea2d4d7b88..655ca9cc39c58 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -499,15 +499,7 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request): request.node.add_marker(xfail_mark) super().test_reduce_series(data, all_numeric_reductions, skipna) - def check_reduce_frame(self, ser, op_name, skipna): - arr = ser.array - - if op_name in ["count", "kurt", "sem", "skew"]: - assert not hasattr(arr, op_name) - return - - kwargs = {"ddof": 1} if op_name in ["var", "std"] else {} - + def _get_expected_reduction_dtype(self, arr, op_name: str): if op_name in ["max", "min"]: cmp_dtype = arr.dtype elif arr.dtype.name == "decimal128(7, 3)[pyarrow]": @@ -523,15 +515,15 @@ def check_reduce_frame(self, ser, op_name, skipna): "u": "uint64[pyarrow]", "f": "float64[pyarrow]", }[arr.dtype.kind] - result = arr._reduce(op_name, skipna=skipna, keepdims=True, **kwargs) + return cmp_dtype - if not skipna and ser.isna().any(): - expected = pd.array([pd.NA], dtype=cmp_dtype) - else: - exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)(**kwargs) - expected = pd.array([exp_value], dtype=cmp_dtype) - - tm.assert_extension_array_equal(result, expected) + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_frame(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + if op_name == "skew": + assert not hasattr(data, op_name) + return + return super().test_reduce_frame(data, all_numeric_reductions, skipna) @pytest.mark.parametrize("typ", ["int64", "uint64", "float64"]) def test_median_not_approximate(self, typ): diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 3d9798169c736..e5f6da5371742 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -235,13 +235,7 @@ def check_reduce(self, s, op_name, skipna): expected = bool(expected) tm.assert_almost_equal(result, expected) - def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): - arr = ser.array - - if op_name in ["count", "kurt", "sem"]: - assert not hasattr(arr, op_name) - pytest.skip(f"{op_name} not an array method") - + def _get_expected_reduction_dtype(self, arr, op_name: str): if op_name in ["mean", "median", "var", "std", "skew"]: cmp_dtype = "Float64" elif op_name in ["min", "max"]: @@ -251,14 +245,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): cmp_dtype = "Int32" if is_windows_or_32bit else "Int64" else: raise TypeError("not supposed to reach this") - - result = arr._reduce(op_name, skipna=skipna, keepdims=True) - if not skipna and ser.isna().any(): - expected = pd.array([pd.NA], dtype=cmp_dtype) - else: - exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)() - expected = pd.array([exp_value], dtype=cmp_dtype) - tm.assert_extension_array_equal(result, expected) + return cmp_dtype class TestBooleanReduce(base.BaseBooleanReduceTests): diff --git a/pandas/tests/extension/test_masked_numeric.py b/pandas/tests/extension/test_masked_numeric.py index fc22ccabd7104..b171797dd6359 100644 --- a/pandas/tests/extension/test_masked_numeric.py +++ b/pandas/tests/extension/test_masked_numeric.py @@ -39,6 +39,8 @@ ) from pandas.tests.extension import base +is_windows_or_32bit = is_platform_windows() or not IS64 + pytestmark = [ pytest.mark.filterwarnings( "ignore:invalid value encountered in divide:RuntimeWarning" @@ -246,16 +248,7 @@ def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): expected = pd.NA tm.assert_almost_equal(result, expected) - def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): - if op_name in ["count", "kurt", "sem"]: - assert not hasattr(ser.array, op_name) - pytest.skip(f"{op_name} not an array method") - - arr = ser.array - df = pd.DataFrame({"a": arr}) - - is_windows_or_32bit = is_platform_windows() or not IS64 - + def _get_expected_reduction_dtype(self, arr, op_name: str): if tm.is_float_dtype(arr.dtype): cmp_dtype = arr.dtype.name elif op_name in ["mean", "median", "var", "std", "skew"]: @@ -270,18 +263,7 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): cmp_dtype = "UInt32" if is_windows_or_32bit else "UInt64" else: raise TypeError("not supposed to reach this") - - if not skipna and ser.isna().any(): - expected = pd.array([pd.NA], dtype=cmp_dtype) - else: - exp_value = getattr(ser.dropna().astype(cmp_dtype), op_name)() - expected = pd.array([exp_value], dtype=cmp_dtype) - - result1 = arr._reduce(op_name, skipna=skipna, keepdims=True) - result2 = getattr(df, op_name)(skipna=skipna).array - - tm.assert_extension_array_equal(result1, result2) - tm.assert_extension_array_equal(result2, expected) + return cmp_dtype @pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index f4ff423ad485b..db191954c8d59 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -311,7 +311,8 @@ def check_reduce(self, s, op_name, skipna): tm.assert_almost_equal(result, expected) @pytest.mark.skip("tests not written yet") - def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_frame(self, data, all_numeric_reductions, skipna): pass @pytest.mark.parametrize("skipna", [True, False])