Skip to content

Commit 4dae4ab

Browse files
Backport PR #32561: Ensure valid Block mutation in SeriesBinGrouper. (#32635)
Co-authored-by: Tom Augspurger <[email protected]>
1 parent f770958 commit 4dae4ab

File tree

3 files changed

+28
-0
lines changed

3 files changed

+28
-0
lines changed

doc/source/whatsnew/v1.0.2.rst

+1
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ Fixed regressions
2020
- Fixed regression in ``groupby(..).rolling(..).apply()`` (``RollingGroupby``) where the ``raw`` parameter was ignored (:issue:`31754`)
2121
- Fixed regression in :meth:`rolling(..).corr() <pandas.core.window.rolling.Rolling.corr>` when using a time offset (:issue:`31789`)
2222
- Fixed regression in :meth:`groupby(..).nunique() <pandas.core.groupby.DataFrameGroupBy.nunique>` which was modifying the original values if ``NaN`` values were present (:issue:`31950`)
23+
- Fixed regression in ``DataFrame.groupby`` raising a ``ValueError`` from an internal operation (:issue:`31802`)
2324
- Fixed regression where :func:`read_pickle` raised a ``UnicodeDecodeError`` when reading a py27 pickle with :class:`MultiIndex` column (:issue:`31988`).
2425
- Fixed regression in :class:`DataFrame` arithmetic operations with mis-matched columns (:issue:`31623`)
2526
- Fixed regression in :meth:`groupby(..).agg() <pandas.core.groupby.GroupBy.agg>` calling a user-provided function an extra time on an empty input (:issue:`31760`)

pandas/_libs/reduction.pyx

+2
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ cdef class _BaseGrouper:
177177
object.__setattr__(cached_ityp, '_index_data', islider.buf)
178178
cached_ityp._engine.clear_mapping()
179179
object.__setattr__(cached_typ._data._block, 'values', vslider.buf)
180+
object.__setattr__(cached_typ._data._block, 'mgr_locs',
181+
slice(len(vslider.buf)))
180182
object.__setattr__(cached_typ, '_index', cached_ityp)
181183
object.__setattr__(cached_typ, 'name', self.name)
182184

pandas/tests/groupby/test_bin_groupby.py

+25
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
from pandas.core.dtypes.common import ensure_int64
77

8+
import pandas as pd
89
from pandas import Index, Series, isna
910
import pandas._testing as tm
1011

@@ -51,6 +52,30 @@ def test_series_bin_grouper():
5152
tm.assert_almost_equal(counts, exp_counts)
5253

5354

55+
def assert_block_lengths(x):
56+
assert len(x) == len(x._data.blocks[0].mgr_locs)
57+
return 0
58+
59+
60+
def cumsum_max(x):
61+
x.cumsum().max()
62+
return 0
63+
64+
65+
@pytest.mark.parametrize("func", [cumsum_max, assert_block_lengths])
66+
def test_mgr_locs_updated(func):
67+
# https://github.com/pandas-dev/pandas/issues/31802
68+
# Some operations may require creating new blocks, which requires
69+
# valid mgr_locs
70+
df = pd.DataFrame({"A": ["a", "a", "a"], "B": ["a", "b", "b"], "C": [1, 1, 1]})
71+
result = df.groupby(["A", "B"]).agg(func)
72+
expected = pd.DataFrame(
73+
{"C": [0, 0]},
74+
index=pd.MultiIndex.from_product([["a"], ["a", "b"]], names=["A", "B"]),
75+
)
76+
tm.assert_frame_equal(result, expected)
77+
78+
5479
@pytest.mark.parametrize(
5580
"binner,closed,expected",
5681
[

0 commit comments

Comments
 (0)