Skip to content

Commit 7b09793

Browse files
Backport PR #33761 on branch 1.0.x (REGR: fix DataFrame reduction with EA columns and numeric_only=True) (#34000)
Co-authored-by: Joris Van den Bossche <[email protected]>
1 parent 23288e4 commit 7b09793

File tree

3 files changed

+22
-1
lines changed

3 files changed

+22
-1
lines changed

doc/source/whatsnew/v1.0.4.rst

+1
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ including other versions of pandas.
1616
Fixed regressions
1717
~~~~~~~~~~~~~~~~~
1818
- Bug in :meth:`GroupBy.first` and :meth:`GroupBy.last` where None is not preserved in object dtype (:issue:`32800`)
19+
- Bug in DataFrame reductions using ``numeric_only=True`` and ExtensionArrays (:issue:`33256`).
1920
-
2021

2122
.. _whatsnew_104.bug_fixes:

pandas/core/frame.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -7891,9 +7891,15 @@ def _get_data(axis_matters):
78917891

78927892
out_dtype = "bool" if filter_type == "bool" else None
78937893

7894+
def blk_func(values):
7895+
if isinstance(values, ExtensionArray):
7896+
return values._reduce(name, skipna=skipna, **kwds)
7897+
else:
7898+
return op(values, axis=1, skipna=skipna, **kwds)
7899+
78947900
# After possibly _get_data and transposing, we are now in the
78957901
# simple case where we can use BlockManager._reduce
7896-
res = df._data.reduce(op, axis=1, skipna=skipna, **kwds)
7902+
res = df._data.reduce(blk_func)
78977903
assert isinstance(res, dict)
78987904
if len(res):
78997905
assert len(res) == max(list(res.keys())) + 1, res.keys()

pandas/tests/frame/test_analytics.py

+14
Original file line numberDiff line numberDiff line change
@@ -891,6 +891,20 @@ def test_mean_datetimelike_numeric_only_false(self):
891891
)
892892
tm.assert_series_equal(result, expected)
893893

894+
# mean of period is not allowed
895+
df["D"] = pd.period_range("2016", periods=3, freq="A")
896+
897+
with pytest.raises(TypeError, match="mean is not implemented for Period"):
898+
df.mean(numeric_only=False)
899+
900+
def test_mean_extensionarray_numeric_only_true(self):
901+
# https://github.com/pandas-dev/pandas/issues/33256
902+
arr = np.random.randint(1000, size=(10, 5))
903+
df = pd.DataFrame(arr, dtype="Int64")
904+
result = df.mean(numeric_only=True)
905+
expected = pd.DataFrame(arr).mean()
906+
tm.assert_series_equal(result, expected)
907+
894908
def test_stats_mixed_type(self, float_string_frame):
895909
# don't blow up
896910
float_string_frame.std(1)

0 commit comments

Comments
 (0)