diff --git a/pandas/tests/extension/base/__init__.py b/pandas/tests/extension/base/__init__.py index 0e9a35b9f07e8..7d76838998540 100644 --- a/pandas/tests/extension/base/__init__.py +++ b/pandas/tests/extension/base/__init__.py @@ -60,6 +60,7 @@ class TestMyDtype(BaseDtypeTests): BaseBooleanReduceTests, BaseNoReduceTests, BaseNumericReduceTests, + BaseReduceTests, ) from pandas.tests.extension.base.reshaping import BaseReshapingTests # noqa: F401 from pandas.tests.extension.base.setitem import BaseSetitemTests # noqa: F401 diff --git a/pandas/tests/extension/base/reduce.py b/pandas/tests/extension/base/reduce.py index b7edfb860549c..00919b16a2600 100644 --- a/pandas/tests/extension/base/reduce.py +++ b/pandas/tests/extension/base/reduce.py @@ -15,6 +15,10 @@ class BaseReduceTests(BaseExtensionTests): make sense for numeric/boolean operations. """ + def _supports_reduction(self, obj, op_name: str) -> bool: + # Specify if we expect this reduction to succeed. + return False + def check_reduce(self, s, op_name, skipna): # We perform the same operation on the np.float64 data and check # that the results match. Override if you need to cast to something @@ -66,47 +70,42 @@ def check_reduce_frame(self, ser: pd.Series, op_name: str, skipna: bool): tm.assert_extension_array_equal(result1, expected) - -class BaseNoReduceTests(BaseReduceTests): - """we don't define any reductions""" - - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): - op_name = all_numeric_reductions - s = pd.Series(data) - - msg = ( - "[Cc]annot perform|Categorical is not ordered for operation|" - "does not support reduction|" - ) - - with pytest.raises(TypeError, match=msg): - getattr(s, op_name)(skipna=skipna) - @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_boolean(self, data, all_boolean_reductions, skipna): op_name = all_boolean_reductions s = pd.Series(data) - msg = ( - "[Cc]annot perform|Categorical is not ordered for operation|" - "does not support reduction|" - ) + if not self._supports_reduction(s, op_name): + msg = ( + "[Cc]annot perform|Categorical is not ordered for operation|" + "does not support reduction|" + ) - with pytest.raises(TypeError, match=msg): - getattr(s, op_name)(skipna=skipna) + with pytest.raises(TypeError, match=msg): + getattr(s, op_name)(skipna=skipna) + else: + self.check_reduce(s, op_name, skipna) -class BaseNumericReduceTests(BaseReduceTests): @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series(self, data, all_numeric_reductions, skipna): + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions s = pd.Series(data) - # min/max with empty produce numpy warnings - with warnings.catch_warnings(): - warnings.simplefilter("ignore", RuntimeWarning) - self.check_reduce(s, op_name, skipna) + if not self._supports_reduction(s, op_name): + msg = ( + "[Cc]annot perform|Categorical is not ordered for operation|" + "does not support reduction|" + ) + + with pytest.raises(TypeError, match=msg): + getattr(s, op_name)(skipna=skipna) + + else: + # min/max with empty produce numpy warnings + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + self.check_reduce(s, op_name, skipna) @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_frame(self, data, all_numeric_reductions, skipna): @@ -118,12 +117,28 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna): if op_name in ["count", "kurt", "sem"]: pytest.skip(f"{op_name} not an array method") + if not self._supports_reduction(s, op_name): + pytest.skip(f"Reduction {op_name} not supported for this dtype") + self.check_reduce_frame(s, op_name, skipna) +# TODO: deprecate BaseNoReduceTests, BaseNumericReduceTests, BaseBooleanReduceTests +class BaseNoReduceTests(BaseReduceTests): + """we don't define any reductions""" + + +class BaseNumericReduceTests(BaseReduceTests): + # For backward compatibility only, this only runs the numeric reductions + def _supports_reduction(self, obj, op_name: str) -> bool: + if op_name in ["any", "all"]: + pytest.skip("These are tested in BaseBooleanReduceTests") + return True + + class BaseBooleanReduceTests(BaseReduceTests): - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series(self, data, all_boolean_reductions, skipna): - op_name = all_boolean_reductions - s = pd.Series(data) - self.check_reduce(s, op_name, skipna) + # For backward compatibility only, this only runs the numeric reductions + def _supports_reduction(self, obj, op_name: str) -> bool: + if op_name not in ["any", "all"]: + pytest.skip("These are tested in BaseNumericReduceTests") + return True diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 944ed0dbff66e..b2dd910fd0d2d 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -146,6 +146,9 @@ def test_fillna_series_method(self, data_missing, fillna_method): class Reduce: + def _supports_reduction(self, obj, op_name: str) -> bool: + return True + def check_reduce(self, s, op_name, skipna): if op_name in ["median", "skew", "kurt", "sem"]: msg = r"decimal does not support the .* operation" @@ -183,7 +186,7 @@ def _reduce(self, name: str, *, skipna: bool = True, **kwargs): tm.assert_series_equal(result, expected) -class TestNumericReduce(Reduce, base.BaseNumericReduceTests): +class TestReduce(Reduce, base.BaseReduceTests): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_frame(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions @@ -194,10 +197,6 @@ def test_reduce_frame(self, data, all_numeric_reductions, skipna): return super().test_reduce_frame(data, all_numeric_reductions, skipna) -class TestBooleanReduce(Reduce, base.BaseBooleanReduceTests): - pass - - class TestMethods(base.BaseMethodsTests): def test_fillna_copy_frame(self, data_missing, using_copy_on_write): warn = FutureWarning if not using_copy_on_write else None diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index 8a571d9295e1f..fa3314e36c974 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -175,7 +175,7 @@ def test_fillna_frame(self): unhashable = pytest.mark.xfail(reason="Unhashable") -class TestReduce(base.BaseNoReduceTests): +class TestReduce(base.BaseReduceTests): pass diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index 655ca9cc39c58..eae97fceab2f7 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -408,7 +408,10 @@ def test_accumulate_series(self, data, all_numeric_accumulations, skipna, reques self.check_accumulate(ser, op_name, skipna) -class TestBaseNumericReduce(base.BaseNumericReduceTests): +class TestReduce(base.BaseReduceTests): + def _supports_reduction(self, obj, op_name: str) -> bool: + return True + def check_reduce(self, ser, op_name, skipna): pa_dtype = ser.dtype.pyarrow_dtype if op_name == "count": @@ -429,7 +432,7 @@ def check_reduce(self, ser, op_name, skipna): tm.assert_almost_equal(result, expected) @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series(self, data, all_numeric_reductions, skipna, request): + def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna, request): pa_dtype = data.dtype.pyarrow_dtype opname = all_numeric_reductions @@ -497,44 +500,10 @@ def test_reduce_series(self, data, all_numeric_reductions, skipna, request): "median", }: request.node.add_marker(xfail_mark) - super().test_reduce_series(data, all_numeric_reductions, skipna) - - def _get_expected_reduction_dtype(self, arr, op_name: str): - if op_name in ["max", "min"]: - cmp_dtype = arr.dtype - elif arr.dtype.name == "decimal128(7, 3)[pyarrow]": - if op_name not in ["median", "var", "std"]: - cmp_dtype = arr.dtype - else: - cmp_dtype = "float64[pyarrow]" - elif op_name in ["median", "var", "std", "mean", "skew"]: - cmp_dtype = "float64[pyarrow]" - else: - cmp_dtype = { - "i": "int64[pyarrow]", - "u": "uint64[pyarrow]", - "f": "float64[pyarrow]", - }[arr.dtype.kind] - return cmp_dtype + super().test_reduce_series_numeric(data, all_numeric_reductions, skipna) @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_frame(self, data, all_numeric_reductions, skipna): - op_name = all_numeric_reductions - if op_name == "skew": - assert not hasattr(data, op_name) - return - return super().test_reduce_frame(data, all_numeric_reductions, skipna) - - @pytest.mark.parametrize("typ", ["int64", "uint64", "float64"]) - def test_median_not_approximate(self, typ): - # GH 52679 - result = pd.Series([1, 2], dtype=f"{typ}[pyarrow]").median() - assert result == 1.5 - - -class TestBaseBooleanReduce(base.BaseBooleanReduceTests): - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series( + def test_reduce_series_boolean( self, data, all_boolean_reductions, skipna, na_value, request ): pa_dtype = data.dtype.pyarrow_dtype @@ -566,6 +535,38 @@ def test_reduce_series( result = getattr(ser, op_name)(skipna=skipna) assert result is (op_name == "any") + def _get_expected_reduction_dtype(self, arr, op_name: str): + if op_name in ["max", "min"]: + cmp_dtype = arr.dtype + elif arr.dtype.name == "decimal128(7, 3)[pyarrow]": + if op_name not in ["median", "var", "std"]: + cmp_dtype = arr.dtype + else: + cmp_dtype = "float64[pyarrow]" + elif op_name in ["median", "var", "std", "mean", "skew"]: + cmp_dtype = "float64[pyarrow]" + else: + cmp_dtype = { + "i": "int64[pyarrow]", + "u": "uint64[pyarrow]", + "f": "float64[pyarrow]", + }[arr.dtype.kind] + return cmp_dtype + + @pytest.mark.parametrize("skipna", [True, False]) + def test_reduce_frame(self, data, all_numeric_reductions, skipna): + op_name = all_numeric_reductions + if op_name == "skew": + assert not hasattr(data, op_name) + return + return super().test_reduce_frame(data, all_numeric_reductions, skipna) + + @pytest.mark.parametrize("typ", ["int64", "uint64", "float64"]) + def test_median_not_approximate(self, typ): + # GH 52679 + result = pd.Series([1, 2], dtype=f"{typ}[pyarrow]").median() + assert result == 1.5 + class TestBaseGroupby(base.BaseGroupbyTests): def test_in_numeric_groupby(self, data_for_grouping): diff --git a/pandas/tests/extension/test_boolean.py b/pandas/tests/extension/test_boolean.py index e5f6da5371742..229f1f4ec028c 100644 --- a/pandas/tests/extension/test_boolean.py +++ b/pandas/tests/extension/test_boolean.py @@ -220,7 +220,10 @@ def test_groupby_sum_mincount(self, data_for_grouping, min_count): tm.assert_frame_equal(result, expected) -class TestNumericReduce(base.BaseNumericReduceTests): +class TestReduce(base.BaseReduceTests): + def _supports_reduction(self, obj, op_name: str) -> bool: + return True + def check_reduce(self, s, op_name, skipna): if op_name == "count": result = getattr(s, op_name)() @@ -248,10 +251,6 @@ def _get_expected_reduction_dtype(self, arr, op_name: str): return cmp_dtype -class TestBooleanReduce(base.BaseBooleanReduceTests): - pass - - class TestPrinting(base.BasePrintingTests): pass diff --git a/pandas/tests/extension/test_categorical.py b/pandas/tests/extension/test_categorical.py index fc4dfe3af3bca..e24d29ea53908 100644 --- a/pandas/tests/extension/test_categorical.py +++ b/pandas/tests/extension/test_categorical.py @@ -152,7 +152,7 @@ class TestMissing(base.BaseMissingTests): pass -class TestReduce(base.BaseNoReduceTests): +class TestReduce(base.BaseReduceTests): pass diff --git a/pandas/tests/extension/test_interval.py b/pandas/tests/extension/test_interval.py index 4ef303289ee5c..b4870d2f1fe2f 100644 --- a/pandas/tests/extension/test_interval.py +++ b/pandas/tests/extension/test_interval.py @@ -105,7 +105,7 @@ class TestInterface(BaseInterval, base.BaseInterfaceTests): pass -class TestReduce(base.BaseNoReduceTests): +class TestReduce(base.BaseReduceTests): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions diff --git a/pandas/tests/extension/test_masked_numeric.py b/pandas/tests/extension/test_masked_numeric.py index b171797dd6359..cb3792d37831d 100644 --- a/pandas/tests/extension/test_masked_numeric.py +++ b/pandas/tests/extension/test_masked_numeric.py @@ -227,7 +227,12 @@ class TestGroupby(base.BaseGroupbyTests): pass -class TestNumericReduce(base.BaseNumericReduceTests): +class TestReduce(base.BaseReduceTests): + def _supports_reduction(self, obj, op_name: str) -> bool: + if op_name in ["any", "all"]: + pytest.skip(reason="Tested in tests/reductions/test_reductions.py") + return True + def check_reduce(self, ser: pd.Series, op_name: str, skipna: bool): # overwrite to ensure pd.NA is tested instead of np.nan # https://github.com/pandas-dev/pandas/issues/30958 @@ -266,11 +271,6 @@ def _get_expected_reduction_dtype(self, arr, op_name: str): return cmp_dtype -@pytest.mark.skip(reason="Tested in tests/reductions/test_reductions.py") -class TestBooleanReduce(base.BaseBooleanReduceTests): - pass - - class TestAccumulation(base.BaseAccumulateTests): def _supports_accumulation(self, ser: pd.Series, op_name: str) -> bool: return True diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index db191954c8d59..14f98e4115e4e 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -303,11 +303,22 @@ class TestPrinting(BaseNumPyTests, base.BasePrintingTests): pass -class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests): +class TestReduce(BaseNumPyTests, base.BaseReduceTests): + def _supports_reduction(self, obj, op_name: str) -> bool: + if tm.get_dtype(obj).kind == "O": + return op_name in ["sum", "min", "max", "any", "all"] + return True + def check_reduce(self, s, op_name, skipna): - result = getattr(s, op_name)(skipna=skipna) + res_op = getattr(s, op_name) # avoid coercing int -> float. Just cast to the actual numpy type. - expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna) + exp_op = getattr(s.astype(s.dtype._dtype), op_name) + if op_name == "count": + result = res_op() + expected = exp_op() + else: + result = res_op(skipna=skipna) + expected = exp_op(skipna=skipna) tm.assert_almost_equal(result, expected) @pytest.mark.skip("tests not written yet") @@ -315,15 +326,6 @@ def check_reduce(self, s, op_name, skipna): def test_reduce_frame(self, data, all_numeric_reductions, skipna): pass - @pytest.mark.parametrize("skipna", [True, False]) - def test_reduce_series(self, data, all_boolean_reductions, skipna): - super().test_reduce_series(data, all_boolean_reductions, skipna) - - -@skip_nested -class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests): - pass - class TestMissing(BaseNumPyTests, base.BaseMissingTests): @skip_nested diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index d42d79da17f4e..7256ea5837bbf 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -168,7 +168,7 @@ def test_fillna_no_op_returns_copy(self, data): tm.assert_extension_array_equal(result, data) -class TestNoReduce(base.BaseNoReduceTests): +class TestReduce(base.BaseReduceTests): @pytest.mark.parametrize("skipna", [True, False]) def test_reduce_series_numeric(self, data, all_numeric_reductions, skipna): op_name = all_numeric_reductions