Skip to content

Commit 14affe0

Browse files
authored
PERF: Groupby aggregations with Categorical (#52120)
1 parent 57c4940 commit 14affe0

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

pandas/core/groupby/ops.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ def _disallow_invalid_ops(self, dtype: DtypeObj, is_numeric: bool = False):
257257
# don't go down a group-by-group path, since in the empty-groups
258258
# case that would fail to raise
259259
raise TypeError(f"Cannot perform {how} with non-ordered Categorical")
260-
if how not in ["rank", "any", "all"]:
260+
if how not in ["rank", "any", "all", "first", "last", "min", "max"]:
261261
# only "rank" is implemented in cython
262262
raise NotImplementedError(f"{dtype} dtype not supported")
263263

@@ -356,11 +356,17 @@ def _ea_wrap_cython_operation(
356356
)
357357

358358
elif isinstance(values, Categorical):
359-
assert self.how in ["rank", "any", "all"]
359+
assert self.how in ["rank", "any", "all", "first", "last", "min", "max"]
360360
mask = values.isna()
361361
if self.how == "rank":
362362
assert values.ordered # checked earlier
363363
npvalues = values._ndarray
364+
elif self.how in ["first", "last", "min", "max"]:
365+
if self.how in ["min", "max"]:
366+
assert values.ordered # checked earlier
367+
npvalues = values._ndarray
368+
result_mask = np.zeros(ngroups, dtype=np.uint8)
369+
kwargs["result_mask"] = result_mask
364370
else:
365371
npvalues = values.astype(bool)
366372

@@ -373,9 +379,9 @@ def _ea_wrap_cython_operation(
373379
**kwargs,
374380
)
375381

376-
# If we ever have more than just "rank" here, we'll need to do
377-
# `if self.how in self.cast_blocklist` like we do for other dtypes.
378-
return res_values
382+
if self.how in self.cast_blocklist:
383+
return res_values
384+
return values._from_backing_data(res_values)
379385

380386
npvalues = self._ea_to_cython_values(values)
381387

0 commit comments

Comments
 (0)