From c485ef2b4fb89bdf955a110b781e9ca5d3ad9498 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Fri, 27 Nov 2020 21:12:01 +0100 Subject: [PATCH] Backport PR #38094: REGR: fix regression in groupby aggregation with out-of-bounds datetimes --- doc/source/whatsnew/v1.1.5.rst | 1 + pandas/_libs/reduction.pyx | 4 +++- .../tests/groupby/aggregate/test_aggregate.py | 19 +++++++++++++++++++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index 29b0e99a3a356..9be1ff7e836ea 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -20,6 +20,7 @@ Fixed regressions - Fixed regression in inplace operations on :class:`Series` with ``ExtensionDtype`` with NumPy dtyped operand (:issue:`37910`) - Fixed regression in metadata propagation for ``groupby`` iterator (:issue:`37343`) - Fixed regression in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`) +- Fixed regression in :meth:`DataFrame.groupby` aggregation with out-of-bounds datetime objects in an object-dtype column (:issue:`36003`) - Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`) - Fixed regression in :meth:`DataFrame.fillna` not filling ``NaN`` after other operations such as :meth:`DataFrame.pivot` (:issue:`36495`). diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 7b36bc8baf891..0d6f7f955b217 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -46,7 +46,9 @@ cdef class _BaseGrouper: Slider islider, Slider vslider): if cached_typ is None: cached_ityp = self.ityp(islider.buf) - cached_typ = self.typ(vslider.buf, index=cached_ityp, name=self.name) + cached_typ = self.typ( + vslider.buf, dtype=vslider.buf.dtype, index=cached_ityp, name=self.name + ) else: # See the comment in indexes/base.py about _index_data. # We need this for EA-backed indexes that have a reference diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 127d3fadee555..9d67397d2ccb9 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1,6 +1,7 @@ """ test .agg behavior / note that .apply is tested generally in test_groupby.py """ +import datetime import functools from functools import partial @@ -1089,3 +1090,21 @@ def test_agg_no_suffix_index(): result = df["A"].agg(["sum", lambda x: x.sum(), lambda x: x.sum()]) expected = pd.Series([12, 12, 12], index=["sum", "", ""], name="A") tm.assert_series_equal(result, expected) + + +def test_aggregate_datetime_objects(): + # https://github.com/pandas-dev/pandas/issues/36003 + # ensure we don't raise an error but keep object dtype for out-of-bounds + # datetimes + df = DataFrame( + { + "A": ["X", "Y"], + "B": [ + datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + datetime.datetime(3005, 1, 1, 10, 30, 23, 540000), + ], + } + ) + result = df.groupby("A").B.max() + expected = df.set_index("A")["B"] + tm.assert_series_equal(result, expected)