Skip to content

Commit c3e32d7

Browse files
authored
REGR: Fix TypeError in groupby min / max of period column (#31477)
1 parent 79633f9 commit c3e32d7

File tree

3 files changed

+36
-1
lines changed

3 files changed

+36
-1
lines changed

doc/source/whatsnew/v1.0.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Fixed regressions
2020
- Fixed regression in ``DataFrame.__setitem__`` raising an ``AttributeError`` with a :class:`MultiIndex` and a non-monotonic indexer (:issue:`31449`)
2121
- Fixed regression in :class:`Series` multiplication when multiplying a numeric :class:`Series` with >10000 elements with a timedelta-like scalar (:issue:`31457`)
2222
- Fixed regression in :meth:`GroupBy.apply` if called with a function which returned a non-pandas non-scalar object (e.g. a list or numpy array) (:issue:`31441`)
23+
- Fixed regression in :meth:`DataFrame.groupby` whereby taking the minimum or maximum of a column with period dtype would raise a ``TypeError``. (:issue:`31471`)
2324
- Fixed regression in :meth:`to_datetime` when parsing non-nanosecond resolution datetimes (:issue:`31491`)
2425
- Fixed regression in :meth:`~DataFrame.to_csv` where specifying an ``na_rep`` might truncate the values written (:issue:`31447`)
2526
- Fixed regression in :class:`Categorical` construction with ``numpy.str_`` categories (:issue:`31499`)

pandas/core/groupby/ops.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
is_extension_array_dtype,
3232
is_integer_dtype,
3333
is_numeric_dtype,
34+
is_period_dtype,
3435
is_sparse,
3536
is_timedelta64_dtype,
3637
needs_i8_conversion,
@@ -567,7 +568,12 @@ def _cython_operation(
567568
if swapped:
568569
result = result.swapaxes(0, axis)
569570

570-
if is_datetime64tz_dtype(orig_values.dtype):
571+
if is_datetime64tz_dtype(orig_values.dtype) or is_period_dtype(
572+
orig_values.dtype
573+
):
574+
# We need to use the constructors directly for these dtypes
575+
# since numpy won't recognize them
576+
# https://github.com/pandas-dev/pandas/issues/31471
571577
result = type(orig_values)(result.astype(np.int64), dtype=orig_values.dtype)
572578
elif is_datetimelike and kind == "aggregate":
573579
result = result.astype(orig_values.dtype)

pandas/tests/groupby/aggregate/test_aggregate.py

+28
Original file line numberDiff line numberDiff line change
@@ -684,6 +684,34 @@ def aggfunc(x):
684684
tm.assert_frame_equal(result, expected)
685685

686686

687+
@pytest.mark.parametrize("func", ["min", "max"])
688+
def test_groupby_aggregate_period_column(func):
689+
# GH 31471
690+
groups = [1, 2]
691+
periods = pd.period_range("2020", periods=2, freq="Y")
692+
df = pd.DataFrame({"a": groups, "b": periods})
693+
694+
result = getattr(df.groupby("a")["b"], func)()
695+
idx = pd.Int64Index([1, 2], name="a")
696+
expected = pd.Series(periods, index=idx, name="b")
697+
698+
tm.assert_series_equal(result, expected)
699+
700+
701+
@pytest.mark.parametrize("func", ["min", "max"])
702+
def test_groupby_aggregate_period_frame(func):
703+
# GH 31471
704+
groups = [1, 2]
705+
periods = pd.period_range("2020", periods=2, freq="Y")
706+
df = pd.DataFrame({"a": groups, "b": periods})
707+
708+
result = getattr(df.groupby("a"), func)()
709+
idx = pd.Int64Index([1, 2], name="a")
710+
expected = pd.DataFrame({"b": periods}, index=idx)
711+
712+
tm.assert_frame_equal(result, expected)
713+
714+
687715
class TestLambdaMangling:
688716
def test_basic(self):
689717
df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})

0 commit comments

Comments
 (0)