From b620044300160944756c8d114636a4dac2552b7e Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 1 May 2020 20:19:11 +0200 Subject: [PATCH] Backport PR #33761 on branch 1.0.x (REGR: fix DataFrame reduction with EA columns and numeric_only=True) --- doc/source/whatsnew/v1.0.4.rst | 1 + pandas/core/frame.py | 8 +++++++- pandas/tests/frame/test_analytics.py | 14 ++++++++++++++ 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.4.rst b/doc/source/whatsnew/v1.0.4.rst index ed4a7ffc44441..8eb6b32669d60 100644 --- a/doc/source/whatsnew/v1.0.4.rst +++ b/doc/source/whatsnew/v1.0.4.rst @@ -16,6 +16,7 @@ including other versions of pandas. Fixed regressions ~~~~~~~~~~~~~~~~~ - Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`) +- Bug in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`). - .. _whatsnew_104.bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 819d341d2d5b4..94f70f7ea2165 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7891,9 +7891,15 @@ def _get_data(axis_matters): out_dtype = "bool" if filter_type == "bool" else None + def blk_func(values): + if isinstance(values, ExtensionArray): + return values._reduce(name, skipna=skipna, **kwds) + else: + return op(values, axis=1, skipna=skipna, **kwds) + # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager._reduce - res = df._data.reduce(op, axis=1, skipna=skipna, **kwds) + res = df._data.reduce(blk_func) assert isinstance(res, dict) if len(res): assert len(res) == max(list(res.keys())) + 1, res.keys() diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 25b2997eb088f..8e1c3effb6cfc 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -891,6 +891,20 @@ def test_mean_datetimelike_numeric_only_false(self): ) tm.assert_series_equal(result, expected) + # mean of period is not allowed + df["D"] = pd.period_range("2016", periods=3, freq="A") + + with pytest.raises(TypeError, match="mean is not implemented for Period"): + df.mean(numeric_only=False) + + def test_mean_extensionarray_numeric_only_true(self): + # https://github.com/pandas-dev/pandas/issues/33256 + arr = np.random.randint(1000, size=(10, 5)) + df = pd.DataFrame(arr, dtype="Int64") + result = df.mean(numeric_only=True) + expected = pd.DataFrame(arr).mean() + tm.assert_series_equal(result, expected) + def test_stats_mixed_type(self, float_string_frame): # don't blow up float_string_frame.std(1)