diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 7ad7e8f5a27b0..f8199524abbaf 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -571,6 +571,7 @@ Numeric - Bug in :meth:`DataFrame.mean` with ``numeric_only=False`` and either ``datetime64`` dtype or ``PeriodDtype`` column incorrectly raising ``TypeError`` (:issue:`32426`) - Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`) - Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`) +- Bug in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`). - Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`) - Bug in :class:`DataFrame` and :class:`Series` addition and subtraction between object-dtype objects and ``datetime64`` dtype objects (:issue:`33824`) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f8cb99e2b2e75..3d563f48d32c9 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8325,10 +8325,10 @@ def _get_data(axis_matters): out_dtype = "bool" if filter_type == "bool" else None def blk_func(values): - if values.ndim == 1 and not isinstance(values, np.ndarray): - # we can't pass axis=1 - return op(values, axis=0, skipna=skipna, **kwds) - return op(values, axis=1, skipna=skipna, **kwds) + if isinstance(values, ExtensionArray): + return values._reduce(name, skipna=skipna, **kwds) + else: + return op(values, axis=1, skipna=skipna, **kwds) # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager._reduce diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 75afc59382a75..f69c85c070ca4 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -896,9 +896,17 @@ def test_mean_datetimelike_numeric_only_false(self): # mean of period is not allowed df["D"] = pd.period_range("2016", periods=3, freq="A") - with pytest.raises(TypeError, match="reduction operation 'mean' not allowed"): + with pytest.raises(TypeError, match="mean is not implemented for Period"): df.mean(numeric_only=False) + def test_mean_extensionarray_numeric_only_true(self): + # https://github.com/pandas-dev/pandas/issues/33256 + arr = np.random.randint(1000, size=(10, 5)) + df = pd.DataFrame(arr, dtype="Int64") + result = df.mean(numeric_only=True) + expected = pd.DataFrame(arr).mean() + tm.assert_series_equal(result, expected) + def test_stats_mixed_type(self, float_string_frame): # don't blow up float_string_frame.std(1)