From 41e801ce7bcc1af6ec8e6052610e19adc6863cc0 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 24 Apr 2020 09:56:06 +0200 Subject: [PATCH 1/3] REGR: fix DataFrame reduction with EA columns and numeric_only=True --- pandas/core/frame.py | 8 ++++---- pandas/tests/frame/test_analytics.py | 8 ++++++++ 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2d2f7bbf7092f..94ac29f2f39b5 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -8281,10 +8281,10 @@ def _get_data(axis_matters): out_dtype = "bool" if filter_type == "bool" else None def blk_func(values): - if values.ndim == 1 and not isinstance(values, np.ndarray): - # we can't pass axis=1 - return op(values, axis=0, skipna=skipna, **kwds) - return op(values, axis=1, skipna=skipna, **kwds) + if isinstance(values, ExtensionArray): + return values._reduce(name, skipna=skipna, **kwds) + else: + return op(values, axis=1, skipna=skipna, **kwds) # After possibly _get_data and transposing, we are now in the # simple case where we can use BlockManager._reduce diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 0255759513e28..d749946a75364 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -895,6 +895,14 @@ def test_mean_datetimelike_numeric_only_false(self): ) tm.assert_series_equal(result, expected) + def test_mean_extensionarray_numeric_only_true(self): + # https://github.com/pandas-dev/pandas/issues/33256 + arr = np.random.randint(1000, size=(10, 5)) + df = pd.DataFrame(arr, dtype="Int64") + result = df.mean(numeric_only=True) + expected = pd.DataFrame(arr).mean() + tm.assert_series_equal(result, expected) + def test_stats_mixed_type(self, float_string_frame): # don't blow up float_string_frame.std(1) From 879261b01d897d7504a1eb3a845c0ec589d3dfa3 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 24 Apr 2020 11:44:36 +0200 Subject: [PATCH 2/3] add whatsnew --- doc/source/whatsnew/v1.1.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index cd1cb0b64f74a..6348be9e636fd 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -487,6 +487,7 @@ Numeric - Bug in :meth:`DataFrame.mean` with ``numeric_only=False`` and either ``datetime64`` dtype or ``PeriodDtype`` column incorrectly raising ``TypeError`` (:issue:`32426`) - Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`) - Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`) +- Bug in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`). - Conversion From e3b9b455389dbe1821b51df5c6d1e4578a89eae6 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 1 May 2020 13:46:33 +0200 Subject: [PATCH 3/3] fix assert error message --- pandas/tests/frame/test_analytics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_analytics.py b/pandas/tests/frame/test_analytics.py index 904faa2d37f32..f69c85c070ca4 100644 --- a/pandas/tests/frame/test_analytics.py +++ b/pandas/tests/frame/test_analytics.py @@ -896,7 +896,7 @@ def test_mean_datetimelike_numeric_only_false(self): # mean of period is not allowed df["D"] = pd.period_range("2016", periods=3, freq="A") - with pytest.raises(TypeError, match="reduction operation 'mean' not allowed"): + with pytest.raises(TypeError, match="mean is not implemented for Period"): df.mean(numeric_only=False) def test_mean_extensionarray_numeric_only_true(self):