Skip to content

Commit a73828f

Browse files
committed
Fix groupby of categoricals with just "max"
I discovered pandas-dev/pandas#28641 while testing ... and I fixed it.
1 parent 6421b0b commit a73828f

File tree

2 files changed

+24
-7
lines changed

2 files changed

+24
-7
lines changed

groupby.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -302,14 +302,16 @@ def groupby(
302302
and hasattr(table[colname], "cat")
303303
}
304304
for colname in category_colnames:
305-
table[colname] = table[colname].cat.as_ordered()
305+
table[colname].cat.as_ordered(inplace=True)
306+
# Add dummy "size" to work around
307+
# https://github.com/pandas-dev/pandas/issues/28641
308+
agg_sets[colname].add("size")
306309

307310
if group_specs:
308311
# aggs: DataFrame indexed by group
309312
# out: just the group colnames, no values yet (we'll add them later)
310-
grouped = table.groupby(group_specs)
311-
if agg_sets:
312-
aggs = grouped.agg(agg_sets)
313+
grouped = table.groupby(group_specs, as_index=True)
314+
aggs = grouped.agg(agg_sets)
313315
out = aggs.index.to_frame(index=False)
314316
# Remove unused categories (because `np.nan` deletes categories)
315317
for column in out:
@@ -320,8 +322,7 @@ def groupby(
320322
# aggs: DataFrame with just one row
321323
# out: one empty row, no columns yet
322324
grouped = table
323-
if agg_sets:
324-
aggs = table.agg(agg_sets)
325+
aggs = table.agg(agg_sets)
325326
out = pd.DataFrame(columns=[], index=[0])
326327

327328
# Now copy values from `aggs` into `out`. (They have the same index.)
@@ -348,7 +349,7 @@ def groupby(
348349

349350
# Remember those category colnames we converted to ordered? Now we need to
350351
# undo that (and remove newly-unused categories).
351-
for colname in out.columns:
352+
for colname in list(out.columns):
352353
column = out[colname]
353354
if hasattr(column, "cat") and column.cat.ordered:
354355
column.cat.remove_unused_categories(inplace=True)

test_groupby.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -395,6 +395,22 @@ def test_aggregate_text_category_values(self):
395395
),
396396
)
397397

398+
def test_aggregate_text_category_values_max(self):
399+
# https://github.com/pandas-dev/pandas/issues/28641
400+
result = groupby(
401+
pd.DataFrame(
402+
{"A": [1997], "B": pd.Series(["30-SEP-97"], dtype="category")}
403+
),
404+
[Group("A", None)],
405+
[Aggregation(Operation.MAX, "B", "X")],
406+
)
407+
assert_frame_equal(
408+
result,
409+
pd.DataFrame(
410+
{"A": [1997], "X": pd.Series(["30-SEP-97"], dtype="category")}
411+
),
412+
)
413+
398414
def test_aggregate_text_category_values_empty_still_has_object_dtype(self):
399415
result = groupby(
400416
pd.DataFrame({"A": [None]}, dtype=str).astype("category"),

0 commit comments

Comments
 (0)