From 468517ab3c1edf8659276f6776a33409a7e85e5f Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 26 Nov 2020 17:45:00 +0100 Subject: [PATCH 1/3] REGR: fix regression in groupby aggregation with out-of-bounds datetimes --- doc/source/whatsnew/v1.1.5.rst | 1 + pandas/core/groupby/ops.py | 4 ++++ .../tests/groupby/aggregate/test_aggregate.py | 19 +++++++++++++++++++ 3 files changed, 24 insertions(+) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index a8bbf692a72e5..7b7f77a582463 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -20,6 +20,7 @@ Fixed regressions - Fixed regression in inplace operations on :class:`Series` with ``ExtensionDtype`` with NumPy dtyped operand (:issue:`37910`) - Fixed regression in metadata propagation for ``groupby`` iterator (:issue:`37343`) - Fixed regression in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`) +- Fixed regression in :meth:`DataFrame.groupby` aggregation with out-of-bounds datetime objects in an object-dtype column (:issues:`36003`) - Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index fc80852f00c95..c7038a10afe87 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -639,6 +639,10 @@ def agg_series(self, obj: Series, func: F): try: return self._aggregate_series_fast(obj, func) + except AssertionError: + # in some cases (eg GH-36003) an internal AssertionError can be + # raised if libreduction cannot handle this case + pass except ValueError as err: if "Must produce aggregated value" in str(err): # raised in libreduction diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index dba039b66d22d..820d51325b424 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1,6 +1,7 @@ """ test .agg behavior / note that .apply is tested generally in test_groupby.py """ +import datetime import functools from functools import partial @@ -1156,3 +1157,21 @@ def test_agg_no_suffix_index(): result = df["A"].agg(["sum", lambda x: x.sum(), lambda x: x.sum()]) expected = Series([12, 12, 12], index=["sum", "", ""], name="A") tm.assert_series_equal(result, expected) + + +def test_aggregate_datetime_objects(): + # https://github.com/pandas-dev/pandas/issues/36003 + # ensure we don't raise an error but keep object dtype for out-of-bounds + # datetimes + df = DataFrame( + { + "A": ["X", "Y"], + "B": [ + datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + datetime.datetime(3005, 1, 1, 10, 30, 23, 540000), + ], + } + ) + result = df.groupby("A").B.max() + expected = df.set_index("A")["B"] + tm.assert_series_equal(result, expected) From ff56f15db79bef12e732e06bcbd77da97cf6448d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 26 Nov 2020 19:45:14 +0100 Subject: [PATCH 2/3] fix rst --- doc/source/whatsnew/v1.1.5.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst index 7b7f77a582463..b34df1726dd17 100644 --- a/doc/source/whatsnew/v1.1.5.rst +++ b/doc/source/whatsnew/v1.1.5.rst @@ -20,7 +20,7 @@ Fixed regressions - Fixed regression in inplace operations on :class:`Series` with ``ExtensionDtype`` with NumPy dtyped operand (:issue:`37910`) - Fixed regression in metadata propagation for ``groupby`` iterator (:issue:`37343`) - Fixed regression in indexing on a :class:`Series` with ``CategoricalDtype`` after unpickling (:issue:`37631`) -- Fixed regression in :meth:`DataFrame.groupby` aggregation with out-of-bounds datetime objects in an object-dtype column (:issues:`36003`) +- Fixed regression in :meth:`DataFrame.groupby` aggregation with out-of-bounds datetime objects in an object-dtype column (:issue:`36003`) - Fixed regression in ``df.groupby(..).rolling(..)`` with the resulting :class:`MultiIndex` when grouping by a label that is in the index (:issue:`37641`) .. --------------------------------------------------------------------------- From 3c5f9247be8dae0f9ce2269e8cbbf5c02e09323d Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Thu, 26 Nov 2020 22:58:03 +0100 Subject: [PATCH 3/3] specify dtype --- pandas/_libs/reduction.pyx | 4 +++- pandas/core/groupby/ops.py | 4 ---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/reduction.pyx b/pandas/_libs/reduction.pyx index 9459cd297c758..ad6329c588bbe 100644 --- a/pandas/_libs/reduction.pyx +++ b/pandas/_libs/reduction.pyx @@ -44,7 +44,9 @@ cdef class _BaseGrouper: Slider islider, Slider vslider): if cached_typ is None: cached_ityp = self.ityp(islider.buf) - cached_typ = self.typ(vslider.buf, index=cached_ityp, name=self.name) + cached_typ = self.typ( + vslider.buf, dtype=vslider.buf.dtype, index=cached_ityp, name=self.name + ) else: # See the comment in indexes/base.py about _index_data. # We need this for EA-backed indexes that have a reference diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index c7038a10afe87..fc80852f00c95 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -639,10 +639,6 @@ def agg_series(self, obj: Series, func: F): try: return self._aggregate_series_fast(obj, func) - except AssertionError: - # in some cases (eg GH-36003) an internal AssertionError can be - # raised if libreduction cannot handle this case - pass except ValueError as err: if "Must produce aggregated value" in str(err): # raised in libreduction