We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 2aadaa8 commit 396131aCopy full SHA for 396131a
doc/source/whatsnew/v1.2.1.rst
@@ -28,6 +28,7 @@ Fixed regressions
28
- Fixed regression in :meth:`DataFrame.replace` raising ``ValueError`` when :class:`DataFrame` has dtype ``bytes`` (:issue:`38900`)
29
- Fixed regression in :meth:`DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`)
30
- Fixed regression that raised ``AttributeError`` with PyArrow versions [0.16.0, 1.0.0) (:issue:`38801`)
31
+- Fixed regression in :meth:`DataFrame.groupby` when aggregating an :class:`ExtensionDType` that could fail for non-numeric values (:issue:`38980`)
32
-
33
34
pandas/core/groupby/ops.py
@@ -540,7 +540,9 @@ def _ea_wrap_cython_operation(
540
result = type(orig_values)._from_sequence(res_values)
541
return result
542
543
- raise NotImplementedError(values.dtype)
+ raise NotImplementedError(
544
+ f"function is not implemented for this dtype: {values.dtype}"
545
+ )
546
547
@final
548
def _cython_operation(
pandas/tests/extension/base/groupby.py
@@ -33,6 +33,22 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping):
expected = expected.reset_index()
self.assert_frame_equal(result, expected)
35
36
+ def test_groupby_agg_extension(self, data_for_grouping):
37
+ # GH#38980 groupby agg on extension type fails for non-numeric types
38
+ df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
39
+
40
+ expected = df.iloc[[0, 2, 4, 7]]
41
+ expected = expected.set_index("A")
42
43
+ result = df.groupby("A").agg({"B": "first"})
44
+ self.assert_frame_equal(result, expected)
45
46
+ result = df.groupby("A").agg("first")
47
48
49
+ result = df.groupby("A").first()
50
51
52
def test_groupby_extension_no_sort(self, data_for_grouping):
53
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
54
result = df.groupby("B", sort=False).A.mean()
pandas/tests/extension/decimal/test_decimal.py
@@ -197,6 +197,10 @@ class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
197
def test_groupby_apply_identity(self, data_for_grouping):
198
super().test_groupby_apply_identity(data_for_grouping)
199
200
+ @pytest.mark.xfail(reason="GH#39098: Converts agg result to object")
201
202
+ super().test_groupby_agg_extension(data_for_grouping)
203
204
205
class TestSetitem(BaseDecimal, base.BaseSetitemTests):
206
pass
pandas/tests/extension/json/test_json.py
@@ -313,6 +313,10 @@ def test_groupby_extension_apply(self):
313
def test_groupby_extension_agg(self, as_index, data_for_grouping):
314
super().test_groupby_extension_agg(as_index, data_for_grouping)
315
316
317
318
319
320
321
class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests):
322
def test_error(self, data, all_arithmetic_operators):
pandas/tests/extension/test_boolean.py
@@ -291,6 +291,22 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping):
291
292
293
294
295
296
+ df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
297
298
+ expected = df.iloc[[0, 2, 4]]
299
300
301
302
303
304
305
306
307
308
309
310
311
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
312
0 commit comments