Skip to content

Commit 396131a

Browse files
authored
REGR: Bug fix for ExtensionArray groupby aggregation on non-numeric types (#38982)
1 parent 2aadaa8 commit 396131a

File tree

6 files changed

+44
-1
lines changed

6 files changed

+44
-1
lines changed

doc/source/whatsnew/v1.2.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ Fixed regressions
2828
- Fixed regression in :meth:`DataFrame.replace` raising ``ValueError`` when :class:`DataFrame` has dtype ``bytes`` (:issue:`38900`)
2929
- Fixed regression in :meth:`DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`)
3030
- Fixed regression that raised ``AttributeError`` with PyArrow versions [0.16.0, 1.0.0) (:issue:`38801`)
31+
- Fixed regression in :meth:`DataFrame.groupby` when aggregating an :class:`ExtensionDType` that could fail for non-numeric values (:issue:`38980`)
3132
-
3233
-
3334

pandas/core/groupby/ops.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -540,7 +540,9 @@ def _ea_wrap_cython_operation(
540540
result = type(orig_values)._from_sequence(res_values)
541541
return result
542542

543-
raise NotImplementedError(values.dtype)
543+
raise NotImplementedError(
544+
f"function is not implemented for this dtype: {values.dtype}"
545+
)
544546

545547
@final
546548
def _cython_operation(

pandas/tests/extension/base/groupby.py

+16
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,22 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping):
3333
expected = expected.reset_index()
3434
self.assert_frame_equal(result, expected)
3535

36+
def test_groupby_agg_extension(self, data_for_grouping):
37+
# GH#38980 groupby agg on extension type fails for non-numeric types
38+
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
39+
40+
expected = df.iloc[[0, 2, 4, 7]]
41+
expected = expected.set_index("A")
42+
43+
result = df.groupby("A").agg({"B": "first"})
44+
self.assert_frame_equal(result, expected)
45+
46+
result = df.groupby("A").agg("first")
47+
self.assert_frame_equal(result, expected)
48+
49+
result = df.groupby("A").first()
50+
self.assert_frame_equal(result, expected)
51+
3652
def test_groupby_extension_no_sort(self, data_for_grouping):
3753
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
3854
result = df.groupby("B", sort=False).A.mean()

pandas/tests/extension/decimal/test_decimal.py

+4
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,10 @@ class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
197197
def test_groupby_apply_identity(self, data_for_grouping):
198198
super().test_groupby_apply_identity(data_for_grouping)
199199

200+
@pytest.mark.xfail(reason="GH#39098: Converts agg result to object")
201+
def test_groupby_agg_extension(self, data_for_grouping):
202+
super().test_groupby_agg_extension(data_for_grouping)
203+
200204

201205
class TestSetitem(BaseDecimal, base.BaseSetitemTests):
202206
pass

pandas/tests/extension/json/test_json.py

+4
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,10 @@ def test_groupby_extension_apply(self):
313313
def test_groupby_extension_agg(self, as_index, data_for_grouping):
314314
super().test_groupby_extension_agg(as_index, data_for_grouping)
315315

316+
@pytest.mark.xfail(reason="GH#39098: Converts agg result to object")
317+
def test_groupby_agg_extension(self, data_for_grouping):
318+
super().test_groupby_agg_extension(data_for_grouping)
319+
316320

317321
class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests):
318322
def test_error(self, data, all_arithmetic_operators):

pandas/tests/extension/test_boolean.py

+16
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,22 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping):
291291
expected = expected.reset_index()
292292
self.assert_frame_equal(result, expected)
293293

294+
def test_groupby_agg_extension(self, data_for_grouping):
295+
# GH#38980 groupby agg on extension type fails for non-numeric types
296+
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
297+
298+
expected = df.iloc[[0, 2, 4]]
299+
expected = expected.set_index("A")
300+
301+
result = df.groupby("A").agg({"B": "first"})
302+
self.assert_frame_equal(result, expected)
303+
304+
result = df.groupby("A").agg("first")
305+
self.assert_frame_equal(result, expected)
306+
307+
result = df.groupby("A").first()
308+
self.assert_frame_equal(result, expected)
309+
294310
def test_groupby_extension_no_sort(self, data_for_grouping):
295311
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
296312
result = df.groupby("B", sort=False).A.mean()

0 commit comments

Comments
 (0)