From 466246d68fedf4e5a0475c7ab512180e6fb69c26 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 3 Aug 2023 16:12:17 -0700 Subject: [PATCH] REF: simplify extension reduction tests --- pandas/tests/extension/base/__init__.py | 1 + pandas/tests/extension/base/reduce.py | 83 +++++++++++-------- .../tests/extension/decimal/test_decimal.py | 9 +- pandas/tests/extension/json/test_json.py | 2 +- pandas/tests/extension/test_arrow.py | 77 ++++++++--------- pandas/tests/extension/test_boolean.py | 9 +- pandas/tests/extension/test_categorical.py | 2 +- pandas/tests/extension/test_interval.py | 2 +- pandas/tests/extension/test_masked_numeric.py | 12 +-- pandas/tests/extension/test_numpy.py | 26 +++--- pandas/tests/extension/test_string.py | 2 +- 11 files changed, 121 insertions(+), 104 deletions(-) diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 0e9a35b9f07e8..7d76838998540 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -60,6 +60,7 @@ class TestMyDtype(BaseDtypeTests): BaseBooleanReduceTests, BaseNoReduceTests, BaseNumericReduceTests, + BaseReduceTests, ) from pandas.tests.extension.base.reshaping import BaseReshapingTests # noqa: F401 from pandas.tests.extension.base.setitem import BaseSetitemTests # noqa: F401 diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index 8f3c919cb0957..8e9684210542a 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -14,6 +14,10 @@ class BaseReduceTests(BaseExtensionTests): make sense for numeric/boolean operations. """ + def _supports_reduction(self, obj, op_name: str) -> bool: + # Specify if we expect this reduction to succeed. + return False + def check_reduce(self, s, op_name, skipna): res_op = getattr(s, op_name) exp_op = getattr(s.astype("float64"), op_name) @@ -25,47 +29,42 @@ def check_reduce(self, s, op_name, skipna): expected = exp_op(skipna=skipna) tm.assert_almost_equal(result, expected) - -class BaseNoReduceTests(BaseReduceTests): - """we don't define any reductions""" - - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): - op_name = all_numeric_reductions - s = pd.Series(data) - - msg = ( - "[Cc]annot perform|Categorical is not ordered for operation|" - "does not support reduction|" - ) - - with pytest.raises(TypeError, match=msg): - getattr(s, op_name)(skipna=skipna) - @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna): op_name = all_boolean_reductions s = pd.Series(data) - msg = ( - "[Cc]annot perform|Categorical is not ordered for operation|" - "does not support reduction|" - ) + if not self._supports_reduction(s, op_name): + msg = ( + "[Cc]annot perform|Categorical is not ordered for operation|" + "does not support reduction|" + ) - with pytest.raises(TypeError, match=msg): - getattr(s, op_name)(skipna=skipna) + with pytest.raises(TypeError, match=msg): + getattr(s, op_name)(skipna=skipna) + else: + self.check_reduce(s, op_name, skipna) -class BaseNumericReduceTests(BaseReduceTests): @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series(self, data, all_numeric_reductions, skipna): + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions s = pd.Series(data) - # min/max with empty produce numpy warnings - with warnings.catch_warnings(): - warnings.simplefilter("ignore", RuntimeWarning) - self.check_reduce(s, op_name, skipna) + if not self._supports_reduction(s, op_name): + msg = ( + "[Cc]annot perform|Categorical is not ordered for operation|" + "does not support reduction|" + ) + + with pytest.raises(TypeError, match=msg): + getattr(s, op_name)(skipna=skipna) + + else: + # min/max with empty produce numpy warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_reduce(s, op_name, skipna) @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_frame(self, data, all_numeric_reductions, skipna): @@ -74,12 +73,28 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna): if not is_numeric_dtype(s): pytest.skip("not numeric dtype") + if not self._supports_reduction(s, op_name): + pytest.skip(f"Reduction {op_name} not supported for this dtype") + self.check_reduce_frame(s, op_name, skipna) +# TODO: deprecate BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests +class BaseNoReduceTests(BaseReduceTests): + """we don't define any reductions""" + + +class BaseNumericReduceTests(BaseReduceTests): + # For backward compatibility only, this only runs the numeric reductions + def _supports_reduction(self, obj, op_name: str) -> bool: + if op_name in ["any", "all"]: + pytest.skip("These are tested in BaseBooleanReduceTests") + return True + + class BaseBooleanReduceTests(BaseReduceTests): - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series(self, data, all_boolean_reductions, skipna): - op_name = all_boolean_reductions - s = pd.Series(data) - self.check_reduce(s, op_name, skipna) + # For backward compatibility only, this only runs the numeric reductions + def _supports_reduction(self, obj, op_name: str) -> bool: + if op_name not in ["any", "all"]: + pytest.skip("These are tested in BaseNumericReduceTests") + return True diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index b2d47ec7d8f32..ddc77eb30c93d 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -146,6 +146,9 @@ def test_fillna_series_method(self, data_missing, fillna_method): class Reduce: + def _supports_reduction(self, obj, op_name: str) -> bool: + return True + def check_reduce(self, s, op_name, skipna): if op_name in ["median", "skew", "kurt", "sem"]: msg = r"decimal does not support the .* operation" @@ -204,11 +207,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): tm.assert_series_equal(result, expected) -class TestNumericReduce(Reduce, base.BaseNumericReduceTests): - pass - - -class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests): +class TestReduce(Reduce, base.BaseReduceTests): pass diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 8a571d9295e1f..fa3314e36c974 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -175,7 +175,7 @@ def test_fillna_frame(self): unhashable = pytest.mark.xfail(reason="Unhashable") -class TestReduce(base.BaseNoReduceTests): +class TestReduce(base.BaseReduceTests): pass diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 7c4ea2d4d7b88..c707c95f006ec 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -408,7 +408,10 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques self.check_accumulate(ser, op_name, skipna) -class TestBaseNumericReduce(base.BaseNumericReduceTests): +class TestReduce(base.BaseReduceTests): + def _supports_reduction(self, obj, op_name: str) -> bool: + return True + def check_reduce(self, ser, op_name, skipna): pa_dtype = ser.dtype.pyarrow_dtype if op_name == "count": @@ -429,7 +432,7 @@ def check_reduce(self, ser, op_name, skipna): tm.assert_almost_equal(result, expected) @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series(self, data, all_numeric_reductions, skipna, request): + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request): pa_dtype = data.dtype.pyarrow_dtype opname = all_numeric_reductions @@ -497,7 +500,40 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request): "median", }: request.node.add_marker(xfail_mark) - super().test_reduce_series(data, all_numeric_reductions, skipna) + super().test_reduce_series_numeric(data, all_numeric_reductions, skipna) + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_series_boolean( + self, data, all_boolean_reductions, skipna, na_value, request + ): + pa_dtype = data.dtype.pyarrow_dtype + xfail_mark = pytest.mark.xfail( + raises=TypeError, + reason=( + f"{all_boolean_reductions} is not implemented in " + f"pyarrow={pa.__version__} for {pa_dtype}" + ), + ) + if pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype): + # We *might* want to make this behave like the non-pyarrow cases, + # but have not yet decided. + request.node.add_marker(xfail_mark) + + op_name = all_boolean_reductions + ser = pd.Series(data) + + if pa.types.is_temporal(pa_dtype) and not pa.types.is_duration(pa_dtype): + # xref GH#34479 we support this in our non-pyarrow datetime64 dtypes, + # but it isn't obvious we _should_. For now, we keep the pyarrow + # behavior which does not support this. + + with pytest.raises(TypeError, match="does not support reduction"): + getattr(ser, op_name)(skipna=skipna) + + return + + result = getattr(ser, op_name)(skipna=skipna) + assert result is (op_name == "any") def check_reduce_frame(self, ser, op_name, skipna): arr = ser.array @@ -540,41 +576,6 @@ def test_median_not_approximate(self, typ): assert result == 1.5 -class TestBaseBooleanReduce(base.BaseBooleanReduceTests): - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series( - self, data, all_boolean_reductions, skipna, na_value, request - ): - pa_dtype = data.dtype.pyarrow_dtype - xfail_mark = pytest.mark.xfail( - raises=TypeError, - reason=( - f"{all_boolean_reductions} is not implemented in " - f"pyarrow={pa.__version__} for {pa_dtype}" - ), - ) - if pa.types.is_string(pa_dtype) or pa.types.is_binary(pa_dtype): - # We *might* want to make this behave like the non-pyarrow cases, - # but have not yet decided. - request.node.add_marker(xfail_mark) - - op_name = all_boolean_reductions - ser = pd.Series(data) - - if pa.types.is_temporal(pa_dtype) and not pa.types.is_duration(pa_dtype): - # xref GH#34479 we support this in our non-pyarrow datetime64 dtypes, - # but it isn't obvious we _should_. For now, we keep the pyarrow - # behavior which does not support this. - - with pytest.raises(TypeError, match="does not support reduction"): - getattr(ser, op_name)(skipna=skipna) - - return - - result = getattr(ser, op_name)(skipna=skipna) - assert result is (op_name == "any") - - class TestBaseGroupby(base.BaseGroupbyTests): def test_in_numeric_groupby(self, data_for_grouping): dtype = data_for_grouping.dtype diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index 3d9798169c736..886f44c8cd073 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -220,7 +220,10 @@ def test_groupby_sum_mincount(self, data_for_grouping, min_count): tm.assert_frame_equal(result, expected) -class TestNumericReduce(base.BaseNumericReduceTests): +class TestReduce(base.BaseReduceTests): + def _supports_reduction(self, obj, op_name: str) -> bool: + return True + def check_reduce(self, s, op_name, skipna): if op_name == "count": result = getattr(s, op_name)() @@ -261,10 +264,6 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): tm.assert_extension_array_equal(result, expected) -class TestBooleanReduce(base.BaseBooleanReduceTests): - pass - - class TestPrinting(base.BasePrintingTests): pass diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index fc4dfe3af3bca..e24d29ea53908 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -152,7 +152,7 @@ class TestMissing(base.BaseMissingTests): pass -class TestReduce(base.BaseNoReduceTests): +class TestReduce(base.BaseReduceTests): pass diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 4ef303289ee5c..b4870d2f1fe2f 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -105,7 +105,7 @@ class TestInterface(BaseInterval, base.BaseInterfaceTests): pass -class TestReduce(base.BaseNoReduceTests): +class TestReduce(base.BaseReduceTests): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions diff --git a/pandas/tests/extension/test_masked_numeric.py b/pandas/tests/extension/test_masked_numeric.py index fc22ccabd7104..4758529a0fbfa 100644 --- a/pandas/tests/extension/test_masked_numeric.py +++ b/pandas/tests/extension/test_masked_numeric.py @@ -225,7 +225,12 @@ class TestGroupby(base.BaseGroupbyTests): pass -class TestNumericReduce(base.BaseNumericReduceTests): +class TestReduce(base.BaseReduceTests): + def _supports_reduction(self, obj, op_name: str) -> bool: + if op_name in ["any", "all"]: + pytest.skip(reason="Tested in tests/reductions/test_reductions.py") + return True + def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): # overwrite to ensure pd.NA is tested instead of np.nan # https://github.com/pandas-dev/pandas/issues/30958 @@ -284,11 +289,6 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): tm.assert_extension_array_equal(result2, expected) -@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") -class TestBooleanReduce(base.BaseBooleanReduceTests): - pass - - class TestAccumulation(base.BaseAccumulateTests): def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool: return True diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index f4ff423ad485b..0d49652f7b96e 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -303,26 +303,28 @@ class TestPrinting(BaseNumPyTests, base.BasePrintingTests): pass -class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests): +class TestReduce(BaseNumPyTests, base.BaseReduceTests): + def _supports_reduction(self, obj, op_name: str) -> bool: + if tm.get_dtype(obj).kind == "O": + return op_name in ["sum", "min", "max", "any", "all"] + return True + def check_reduce(self, s, op_name, skipna): - result = getattr(s, op_name)(skipna=skipna) + res_op = getattr(s, op_name) # avoid coercing int -> float. Just cast to the actual numpy type. - expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna) + exp_op = getattr(s.astype(s.dtype._dtype), op_name) + if op_name == "count": + result = res_op() + expected = exp_op() + else: + result = res_op(skipna=skipna) + expected = exp_op(skipna=skipna) tm.assert_almost_equal(result, expected) @pytest.mark.skip("tests not written yet") def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): pass - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series(self, data, all_boolean_reductions, skipna): - super().test_reduce_series(data, all_boolean_reductions, skipna) - - -@skip_nested -class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests): - pass - class TestMissing(BaseNumPyTests, base.BaseMissingTests): @skip_nested diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index d42d79da17f4e..7256ea5837bbf 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -168,7 +168,7 @@ def test_fillna_no_op_returns_copy(self, data): tm.assert_extension_array_equal(result, data) -class TestNoReduce(base.BaseNoReduceTests): +class TestReduce(base.BaseReduceTests): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions