Skip to content

Commit 04a0eac

Browse files
jbrockmendeljreback
authored andcommitted
implement _index_data parts of #24024 (#24379)
1 parent 5d134ec commit 04a0eac

File tree

5 files changed

+22
-3
lines changed

5 files changed

+22
-3
lines changed

pandas/_libs/reduction.pyx

+10-3
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,10 @@ cdef class SeriesBinGrouper:
265265
cached_typ = self.typ(vslider.buf, index=cached_ityp,
266266
name=name)
267267
else:
268-
object.__setattr__(cached_ityp, '_data', islider.buf)
268+
# See the comment in indexes/base.py about _index_data.
269+
# We need this for EA-backed indexes that have a reference
270+
# to a 1-d ndarray like datetime / timedelta / period.
271+
object.__setattr__(cached_ityp, '_index_data', islider.buf)
269272
cached_ityp._engine.clear_mapping()
270273
object.__setattr__(
271274
cached_typ._data._block, 'values', vslider.buf)
@@ -569,8 +572,11 @@ cdef class BlockSlider:
569572
util.set_array_not_contiguous(x)
570573

571574
self.nblocks = len(self.blocks)
575+
# See the comment in indexes/base.py about _index_data.
576+
# We need this for EA-backed indexes that have a reference to a 1-d
577+
# ndarray like datetime / timedelta / period.
572578
self.idx_slider = Slider(
573-
self.frame.index.values, self.dummy.index.values)
579+
self.frame.index._index_data, self.dummy.index._index_data)
574580

575581
self.base_ptrs = <char**>malloc(sizeof(char*) * len(self.blocks))
576582
for i, block in enumerate(self.blocks):
@@ -594,7 +600,8 @@ cdef class BlockSlider:
594600

595601
# move and set the index
596602
self.idx_slider.move(start, end)
597-
object.__setattr__(self.index, '_data', self.idx_slider.buf)
603+
604+
object.__setattr__(self.index, '_index_data', self.idx_slider.buf)
598605
self.index._engine.clear_mapping()
599606

600607
cdef reset(self):

pandas/core/indexes/base.py

+6
Original file line numberDiff line numberDiff line change
@@ -519,6 +519,12 @@ def _simple_new(cls, values, name=None, dtype=None, **kwargs):
519519

520520
result = object.__new__(cls)
521521
result._data = values
522+
# _index_data is a (temporary?) fix to ensure that the direct data
523+
# manipulation we do in `_libs/reduction.pyx` continues to work.
524+
# We need access to the actual ndarray, since we're messing with
525+
# data buffers and strides. We don't re-use `_ndarray_values`, since
526+
# we actually set this value too.
527+
result._index_data = values
522528
result.name = name
523529
for k, v in compat.iteritems(kwargs):
524530
setattr(result, k, v)

pandas/core/indexes/datetimes.py

+2
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ def _simple_new(cls, values, name=None, freq=None, tz=None, dtype=None):
269269

270270
result = super(DatetimeIndex, cls)._simple_new(values, freq, tz)
271271
result.name = name
272+
# For groupby perf. See note in indexes/base about _index_data
273+
result._index_data = result._data
272274
result._reset_identity()
273275
return result
274276

pandas/core/indexes/period.py

+2
Original file line numberDiff line numberDiff line change
@@ -235,6 +235,8 @@ def _simple_new(cls, values, name=None, freq=None, **kwargs):
235235
raise TypeError("PeriodIndex._simple_new only accepts PeriodArray")
236236
result = object.__new__(cls)
237237
result._data = values
238+
# For groupby perf. See note in indexes/base about _index_data
239+
result._index_data = values._data
238240
result.name = name
239241
result._reset_identity()
240242
return result

pandas/core/indexes/timedeltas.py

+2
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ def _simple_new(cls, values, name=None, freq=None, dtype=_TD_DTYPE):
199199

200200
result = super(TimedeltaIndex, cls)._simple_new(values, freq)
201201
result.name = name
202+
# For groupby perf. See note in indexes/base about _index_data
203+
result._index_data = result._data
202204
result._reset_identity()
203205
return result
204206

0 commit comments

Comments
 (0)