Skip to content

Commit 0c16e6c

Browse files
Fix index caching.
1 parent 4bf3f20 commit 0c16e6c

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

pandas/_libs/reduction.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ cdef class _BaseGrouper:
5555

5656
cdef inline _update_cached_objs(self, object cached_typ, object cached_ityp,
5757
Slider islider, Slider vslider):
58-
if cached_typ is None or len(vslider.buf) != len(cached_ityp):
58+
if cached_typ is None:
5959
cached_ityp = self.ityp(islider.buf)
6060
cached_typ = self.typ(
6161
vslider.buf, dtype=vslider.buf.dtype, index=cached_ityp, name=self.name
@@ -65,12 +65,12 @@ cdef class _BaseGrouper:
6565
# We need this for EA-backed indexes that have a reference
6666
# to a 1-d ndarray like datetime / timedelta / period.
6767
object.__setattr__(cached_ityp, '_index_data', islider.buf)
68+
object.__setattr__(cached_ityp, '_data', islider.buf)
6869
cached_ityp._engine.clear_mapping()
6970
cached_ityp._cache.clear() # e.g. inferred_freq must go
7071
cached_typ._mgr.set_values(vslider.buf)
7172
object.__setattr__(cached_typ, '_index', cached_ityp)
7273
object.__setattr__(cached_typ, 'name', self.name)
73-
7474
return cached_typ, cached_ityp
7575

7676
cdef inline object _apply_to_group(self,
@@ -259,6 +259,7 @@ cdef class SeriesGrouper(_BaseGrouper):
259259
start += group_size
260260
group_size = 0
261261
continue
262+
print(f"Group size: {group_size}")
262263

263264
end = start + group_size
264265
islider.move(start, end)

pandas/tests/groupby/test_bin_groupby.py

+15
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,21 @@ def test_series_grouper():
2727
tm.assert_almost_equal(counts, exp_counts)
2828

2929

30+
def test_series_grouper_result_length_difference():
31+
obj = Series(np.random.randn(10), dtype="float64")
32+
obj.index = obj.index.astype("O")
33+
labels = np.array([-1, -1, -1, 0, 0, 0, 1, 1, 1, 1], dtype=np.intp)
34+
35+
grouper = libreduction.SeriesGrouper(obj, lambda x: all(x > 0), labels, 2)
36+
result, counts = grouper.get_result()
37+
38+
expected = np.array([all(obj[3:6] > 0), all(obj[6:] > 0)])
39+
tm.assert_almost_equal(result, expected)
40+
41+
exp_counts = np.array([3, 4], dtype=np.int64)
42+
tm.assert_almost_equal(counts, exp_counts)
43+
44+
3045
def test_series_grouper_requires_nonempty_raises():
3146
# GH#29500
3247
obj = Series(np.random.randn(10))

0 commit comments

Comments
 (0)