Skip to content

Commit 7516d02

Browse files
jbrockmendelyehoshuadimarsky
authored andcommitted
Bug gb cummax (pandas-dev#46382)
1 parent d0b0130 commit 7516d02

File tree

3 files changed

+22
-4
lines changed

3 files changed

+22
-4
lines changed

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -455,6 +455,7 @@ Groupby/resample/rolling
455455
- Bug in :meth:`DataFrameGroupby.cumsum` with ``skipna=False`` giving incorrect results (:issue:`46216`)
456456
- Bug in :meth:`.GroupBy.cumsum` with ``timedelta64[ns]`` dtype failing to recognize ``NaT`` as a null value (:issue:`46216`)
457457
- Bug in :meth:`GroupBy.cummin` and :meth:`GroupBy.cummax` with nullable dtypes incorrectly altering the original data in place (:issue:`46220`)
458+
- Bug in :meth:`GroupBy.cummax` with ``int64`` dtype with leading value being the smallest possible int64 (:issue:`46382`)
458459
-
459460

460461
Reshaping

pandas/_libs/groupby.pyx

+9-4
Original file line numberDiff line numberDiff line change
@@ -989,13 +989,18 @@ cdef inline bint _treat_as_na(numeric_object_t val, bint is_datetimelike) nogil:
989989
return False
990990

991991

992-
cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max):
992+
cdef numeric_t _get_min_or_max(numeric_t val, bint compute_max, bint is_datetimelike):
993993
"""
994994
Find either the min or the max supported by numeric_t; 'val' is a placeholder
995995
to effectively make numeric_t an argument.
996996
"""
997997
if numeric_t is int64_t:
998-
return -_int64_max if compute_max else util.INT64_MAX
998+
if compute_max and is_datetimelike:
999+
return -_int64_max
1000+
# Note(jbrockmendel) 2022-03-15 for reasons unknown, using util.INT64_MIN
1001+
# instead of NPY_NAT here causes build warnings and failure in
1002+
# test_cummax_i8_at_implementation_bound
1003+
return NPY_NAT if compute_max else util.INT64_MAX
9991004
elif numeric_t is int32_t:
10001005
return util.INT32_MIN if compute_max else util.INT32_MAX
10011006
elif numeric_t is int16_t:
@@ -1395,7 +1400,7 @@ cdef group_min_max(
13951400
nobs = np.zeros((<object>out).shape, dtype=np.int64)
13961401

13971402
group_min_or_max = np.empty_like(out)
1398-
group_min_or_max[:] = _get_min_or_max(<iu_64_floating_t>0, compute_max)
1403+
group_min_or_max[:] = _get_min_or_max(<iu_64_floating_t>0, compute_max, is_datetimelike)
13991404

14001405
if iu_64_floating_t is int64_t:
14011406
# TODO: only if is_datetimelike?
@@ -1564,7 +1569,7 @@ cdef group_cummin_max(
15641569
bint isna_entry
15651570

15661571
accum = np.empty((ngroups, (<object>values).shape[1]), dtype=values.dtype)
1567-
accum[:] = _get_min_or_max(<iu_64_floating_t>0, compute_max)
1572+
accum[:] = _get_min_or_max(<iu_64_floating_t>0, compute_max, is_datetimelike)
15681573

15691574
na_val = _get_na_val(<iu_64_floating_t>0, is_datetimelike)
15701575

pandas/tests/groupby/test_function.py

+12
Original file line numberDiff line numberDiff line change
@@ -830,6 +830,18 @@ def test_cummax(dtypes_for_minmax):
830830
tm.assert_series_equal(result, expected)
831831

832832

833+
def test_cummax_i8_at_implementation_bound():
834+
# the minimum value used to be treated as NPY_NAT+1 instead of NPY_NAT
835+
# for int64 dtype GH#46382
836+
ser = Series([pd.NaT.value + n for n in range(5)])
837+
df = DataFrame({"A": 1, "B": ser, "C": ser.view("M8[ns]")})
838+
gb = df.groupby("A")
839+
840+
res = gb.cummax()
841+
exp = df[["B", "C"]]
842+
tm.assert_frame_equal(res, exp)
843+
844+
833845
@pytest.mark.parametrize("method", ["cummin", "cummax"])
834846
@pytest.mark.parametrize("dtype", ["float", "Int64", "Float64"])
835847
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)