Skip to content

Backport PR #38982 on branch 1.2.x (REGR: Bug fix for ExtensionArray groupby aggregation on non-numeric types) #39145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.2.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ Fixed regressions
- Fixed regression in :meth:`DataFrame.replace` raising ``ValueError`` when :class:`DataFrame` has dtype ``bytes`` (:issue:`38900`)
- Fixed regression in :meth:`DataFrameGroupBy.diff` raising for ``int8`` and ``int16`` columns (:issue:`39050`)
- Fixed regression that raised ``AttributeError`` with PyArrow versions [0.16.0, 1.0.0) (:issue:`38801`)
- Fixed regression in :meth:`DataFrame.groupby` when aggregating an :class:`ExtensionDType` that could fail for non-numeric values (:issue:`38980`)
-
-

Expand Down
4 changes: 3 additions & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,9 @@ def _ea_wrap_cython_operation(
result = type(orig_values)._from_sequence(res_values)
return result

raise NotImplementedError(values.dtype)
raise NotImplementedError(
f"function is not implemented for this dtype: {values.dtype}"
)

@final
def _cython_operation(
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/extension/base/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,22 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping):
expected = expected.reset_index()
self.assert_frame_equal(result, expected)

def test_groupby_agg_extension(self, data_for_grouping):
# GH#38980 groupby agg on extension type fails for non-numeric types
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})

expected = df.iloc[[0, 2, 4, 7]]
expected = expected.set_index("A")

result = df.groupby("A").agg({"B": "first"})
self.assert_frame_equal(result, expected)

result = df.groupby("A").agg("first")
self.assert_frame_equal(result, expected)

result = df.groupby("A").first()
self.assert_frame_equal(result, expected)

def test_groupby_extension_no_sort(self, data_for_grouping):
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping})
result = df.groupby("B", sort=False).A.mean()
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/extension/decimal/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,10 @@ class TestGroupby(BaseDecimal, base.BaseGroupbyTests):
def test_groupby_apply_identity(self, data_for_grouping):
super().test_groupby_apply_identity(data_for_grouping)

@pytest.mark.xfail(reason="GH#39098: Converts agg result to object")
def test_groupby_agg_extension(self, data_for_grouping):
super().test_groupby_agg_extension(data_for_grouping)


class TestSetitem(BaseDecimal, base.BaseSetitemTests):
pass
Expand Down
4 changes: 4 additions & 0 deletions pandas/tests/extension/json/test_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,10 @@ def test_groupby_extension_apply(self):
def test_groupby_extension_agg(self, as_index, data_for_grouping):
super().test_groupby_extension_agg(as_index, data_for_grouping)

@pytest.mark.xfail(reason="GH#39098: Converts agg result to object")
def test_groupby_agg_extension(self, data_for_grouping):
super().test_groupby_agg_extension(data_for_grouping)


class TestArithmeticOps(BaseJSON, base.BaseArithmeticOpsTests):
def test_error(self, data, all_arithmetic_operators):
Expand Down
16 changes: 16 additions & 0 deletions pandas/tests/extension/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,22 @@ def test_groupby_extension_agg(self, as_index, data_for_grouping):
expected = expected.reset_index()
self.assert_frame_equal(result, expected)

def test_groupby_agg_extension(self, data_for_grouping):
# GH#38980 groupby agg on extension type fails for non-numeric types
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})

expected = df.iloc[[0, 2, 4]]
expected = expected.set_index("A")

result = df.groupby("A").agg({"B": "first"})
self.assert_frame_equal(result, expected)

result = df.groupby("A").agg("first")
self.assert_frame_equal(result, expected)

result = df.groupby("A").first()
self.assert_frame_equal(result, expected)

def test_groupby_extension_no_sort(self, data_for_grouping):
df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1], "B": data_for_grouping})
result = df.groupby("B", sort=False).A.mean()
Expand Down