Skip to content

Commit 1127370

Browse files
committed
PERF: MultiIndex._shallow_copy
1 parent 3b66021 commit 1127370

File tree

6 files changed

+28
-7
lines changed

6 files changed

+28
-7
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ Performance improvements
190190
- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`)
191191
- The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index,
192192
avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of
193-
existing indexes (:issue:`28584`, :issue:`32640`)
193+
existing indexes (:issue:`28584`, :issue:`32640`, :issue:`32669`)
194194

195195
.. ---------------------------------------------------------------------------
196196

pandas/core/indexes/multi.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@ def __new__(
276276
raise ValueError("Must pass non-zero number of levels/codes")
277277

278278
result = object.__new__(MultiIndex)
279+
result._cache = {}
279280

280281
# we've already validated levels and codes, so shortcut here
281282
result._set_levels(levels, copy=copy, validate=False)
@@ -689,7 +690,7 @@ def __len__(self) -> int:
689690
# --------------------------------------------------------------------
690691
# Levels Methods
691692

692-
@cache_readonly
693+
@property
693694
def levels(self):
694695
# Use cache_readonly to ensure that self.get_locs doesn't repeatedly
695696
# create new IndexEngine
@@ -991,7 +992,10 @@ def _shallow_copy(self, values=None, **kwargs):
991992
# discards freq
992993
kwargs.pop("freq", None)
993994
return MultiIndex.from_tuples(values, names=names, **kwargs)
994-
return self.copy(**kwargs)
995+
996+
result = self.copy(**kwargs)
997+
result._cache = self._cache.copy()
998+
return result
995999

9961000
def _shallow_copy_with_infer(self, values, **kwargs):
9971001
# On equal MultiIndexes the difference is empty.

pandas/core/indexes/period.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -251,11 +251,13 @@ def _has_complex_internals(self):
251251

252252
def _shallow_copy(self, values=None, name: Label = no_default):
253253
name = name if name is not no_default else self.name
254-
254+
cache = self._cache.copy() if values is None else {}
255255
if values is None:
256256
values = self._data
257257

258-
return self._simple_new(values, name=name)
258+
result = self._simple_new(values, name=name)
259+
result._cache = cache
260+
return result
259261

260262
def _maybe_convert_timedelta(self, other):
261263
"""

pandas/core/indexes/range.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -141,7 +141,7 @@ def _simple_new(cls, values: range, name: Label = None) -> "RangeIndex":
141141

142142
result._range = values
143143
result.name = name
144-
144+
result._cache = {}
145145
result._reset_identity()
146146
return result
147147

@@ -391,7 +391,9 @@ def _shallow_copy(self, values=None, name: Label = no_default):
391391
name = self.name if name is no_default else name
392392

393393
if values is None:
394-
return self._simple_new(self._range, name=name)
394+
result = self._simple_new(self._range, name=name)
395+
result._cache = self._cache.copy()
396+
return result
395397
else:
396398
return Int64Index._simple_new(values, name=name)
397399

pandas/tests/indexes/common.py

+12
Original file line numberDiff line numberDiff line change
@@ -919,3 +919,15 @@ def test_contains_requires_hashable_raises(self):
919919

920920
with pytest.raises(TypeError):
921921
{} in idx._engine
922+
923+
def test_shallow_copy_copies_cache(self):
924+
# GH32669
925+
idx = self.create_index()
926+
idx.get_loc(idx[0]) # initiate the _engine etc.
927+
shallow_copy = idx._shallow_copy()
928+
# check that the shallow_copied cache is a copy of the original
929+
assert idx._cache == shallow_copy._cache
930+
assert idx._cache is not shallow_copy._cache
931+
# the cache values should reference the same objects
932+
for key, val in idx._cache.items():
933+
assert shallow_copy._cache[key] is val

pandas/tests/indexes/multi/test_names.py

+1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
def check_level_names(index, names):
99
assert [level.name for level in index.levels] == list(names)
10+
assert index.names == list(names)
1011

1112

1213
def test_slice_keep_name():

0 commit comments

Comments
 (0)