Skip to content

Commit 4ce9c0c

Browse files
mroeschkejreback
authored andcommitted
BUG: Incorrect value updating for groupby.cummin/max (#15635)
closes #15635 Author: Matt Roeschke <[email protected]> Closes #15642 from mroeschke/fix_15635 and squashes the following commits: b92b81a [Matt Roeschke] BUG: Incorrect value updating for groupby.cummin/max (#15635)
1 parent 2203808 commit 4ce9c0c

File tree

3 files changed

+22
-11
lines changed

3 files changed

+22
-11
lines changed

doc/source/whatsnew/v0.20.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -716,7 +716,7 @@ Performance Improvements
716716
- Increased performance of ``pd.factorize()`` by releasing the GIL with ``object`` dtype when inferred as strings (:issue:`14859`)
717717
- Improved performance of timeseries plotting with an irregular DatetimeIndex
718718
(or with ``compat_x=True``) (:issue:`15073`).
719-
- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`)
719+
- Improved performance of ``groupby().cummin()`` and ``groupby().cummax()`` (:issue:`15048`, :issue:`15109`, :issue:`15561`, :issue:`15635`)
720720
- Improved performance and reduced memory when indexing with a ``MultiIndex`` (:issue:`15245`)
721721
- When reading buffer object in ``read_sas()`` method without specified format, filepath string is inferred rather than buffer object. (:issue:`14947`)
722722
- Improved performance of ``.rank()`` for categorical data (:issue:`15498`)

pandas/_libs/algos_groupby_helper.pxi.in

+10-10
Original file line numberDiff line numberDiff line change
@@ -603,7 +603,7 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
603603
"""
604604
cdef:
605605
Py_ssize_t i, j, N, K, size
606-
{{dest_type2}} val, min_val = 0
606+
{{dest_type2}} val, mval
607607
ndarray[{{dest_type2}}, ndim=2] accum
608608
int64_t lab
609609

@@ -628,10 +628,10 @@ def group_cummin_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
628628
{{else}}
629629
if val == val:
630630
{{endif}}
631-
if val < accum[lab, j]:
632-
min_val = val
633-
accum[lab, j] = min_val
634-
out[i, j] = accum[lab, j]
631+
mval = accum[lab, j]
632+
if val < mval:
633+
accum[lab, j] = mval = val
634+
out[i, j] = mval
635635

636636

637637
@cython.boundscheck(False)
@@ -645,7 +645,7 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
645645
"""
646646
cdef:
647647
Py_ssize_t i, j, N, K, size
648-
{{dest_type2}} val, max_val = 0
648+
{{dest_type2}} val, mval
649649
ndarray[{{dest_type2}}, ndim=2] accum
650650
int64_t lab
651651

@@ -669,10 +669,10 @@ def group_cummax_{{name}}(ndarray[{{dest_type2}}, ndim=2] out,
669669
{{else}}
670670
if val == val:
671671
{{endif}}
672-
if val > accum[lab, j]:
673-
max_val = val
674-
accum[lab, j] = max_val
675-
out[i, j] = accum[lab, j]
672+
mval = accum[lab, j]
673+
if val > mval:
674+
accum[lab, j] = mval = val
675+
out[i, j] = mval
676676

677677
{{endfor}}
678678

pandas/tests/groupby/test_groupby.py

+11
Original file line numberDiff line numberDiff line change
@@ -4303,6 +4303,17 @@ def test_cummin_cummax(self):
43034303
result = getattr(df.groupby('a')['b'], method)()
43044304
tm.assert_series_equal(expected, result)
43054305

4306+
# GH 15635
4307+
df = pd.DataFrame(dict(a=[1, 2, 1], b=[2, 1, 1]))
4308+
result = df.groupby('a').b.cummax()
4309+
expected = pd.Series([2, 1, 2], name='b')
4310+
tm.assert_series_equal(result, expected)
4311+
4312+
df = pd.DataFrame(dict(a=[1, 2, 1], b=[1, 2, 2]))
4313+
result = df.groupby('a').b.cummin()
4314+
expected = pd.Series([1, 2, 1], name='b')
4315+
tm.assert_series_equal(result, expected)
4316+
43064317

43074318
def _check_groupby(df, result, keys, field, f=lambda x: x.sum()):
43084319
tups = lmap(tuple, df[keys].values)

0 commit comments

Comments
 (0)