From 270db1597588e792617c34f38c79174d08f731f5 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Mar 2022 16:26:16 -0700 Subject: [PATCH 1/3] BUG: gb cummax with int64 and NPY_NAT --- pandas/_libs/groupby.pyx | 13 +++++++++---- pandas/tests/groupby/test_function.py | 12 ++++++++++++ 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/groupby.pyx b/pandas/_libs/groupby.pyx index ff5258e37e352..d84cc4873a250 100644 --- a/pandas/_libs/groupby.pyx +++ b/pandas/_libs/groupby.pyx @@ -989,13 +989,18 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil: return False -cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max): +cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max, bint is_datetimelike): """ Find either the min or the max supported by numeric_t; 'val' is a placeholder to effectively make numeric_t an argument. """ if numeric_t is int64_t: - return -_int64_max if compute_max else util.INT64_MAX + if compute_max and is_datetimelike: + return -_int64_max + # Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN + # instead of NPY_NAT here causes build warnings and failure in + # test_cummax_i8_at_implementation_bound + return NPY_NAT if compute_max else util.INT64_MAX elif numeric_t is int32_t: return util.INT32_MIN if compute_max else util.INT32_MAX elif numeric_t is int16_t: @@ -1395,7 +1400,7 @@ cdef group_min_max( nobs = np.zeros((out).shape, dtype=np.int64) group_min_or_max = np.empty_like(out) - group_min_or_max[:] = _get_min_or_max(0, compute_max) + group_min_or_max[:] = _get_min_or_max(0, compute_max, is_datetimelike) if iu_64_floating_t is int64_t: # TODO: only if is_datetimelike? @@ -1564,7 +1569,7 @@ cdef group_cummin_max( bint isna_entry accum = np.empty((ngroups, (values).shape[1]), dtype=values.dtype) - accum[:] = _get_min_or_max(0, compute_max) + accum[:] = _get_min_or_max(0, compute_max, is_datetimelike) na_val = _get_na_val(0, is_datetimelike) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index ec7f3200b682b..d7a187053f52c 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -830,6 +830,18 @@ def test_cummax(dtypes_for_minmax): tm.assert_series_equal(result, expected) +def test_cummax_i8_at_implementation_bound(): + # the minimum value used to be treated as NPY_NAT+1 instead of NPY_NAT + # for int64 dtype. + ser = Series([pd.NaT.value + n for n in range(5)]) + df = DataFrame({"A": 1, "B": ser, "C": ser.view("M8[ns]")}) + gb = df.groupby("A") + + res = gb.cummax() + exp = df[["B", "C"]] + tm.assert_frame_equal(res, exp) + + @pytest.mark.parametrize("method", ["cummin", "cummax"]) @pytest.mark.parametrize("dtype", ["float", "Int64", "Float64"]) @pytest.mark.parametrize( From 3a75afa7294fa7ff3e140952ff6d29a0497fcf35 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Mar 2022 16:34:48 -0700 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 8dac952874f89..a3725759fda6a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -454,6 +454,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupby.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`) - Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) +- Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`??`) - Reshaping From c1cbc72f5387bb0bd640eed794ebce4aa7302ee9 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Mar 2022 16:36:31 -0700 Subject: [PATCH 3/3] GH ref --- doc/source/whatsnew/v1.5.0.rst | 2 +- pandas/tests/groupby/test_function.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index a3725759fda6a..b18d5aaac6539 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -454,7 +454,7 @@ Groupby/resample/rolling - Bug in :meth:`DataFrameGroupby.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`) - Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`) - Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`) -- Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`??`) +- Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`) - Reshaping diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index d7a187053f52c..7440b63e78b65 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -832,7 +832,7 @@ def test_cummax(dtypes_for_minmax): def test_cummax_i8_at_implementation_bound(): # the minimum value used to be treated as NPY_NAT+1 instead of NPY_NAT - # for int64 dtype. + # for int64 dtype GH#46382 ser = Series([pd.NaT.value + n for n in range(5)]) df = DataFrame({"A": 1, "B": ser, "C": ser.view("M8[ns]")}) gb = df.groupby("A")