diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 5b6f70be478c2..48eff0543ad4d 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -190,7 +190,7 @@ Performance improvements - Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`) - The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index, avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of - existing indexes (:issue:`28584`, :issue:`32640`) + existing indexes (:issue:`28584`, :issue:`32640`, :issue:`32669`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 0bb88145646ed..122097f4478d7 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -276,6 +276,7 @@ def __new__( raise ValueError("Must pass non-zero number of levels/codes") result = object.__new__(MultiIndex) + result._cache = {} # we've already validated levels and codes, so shortcut here result._set_levels(levels, copy=copy, validate=False) @@ -991,7 +992,13 @@ def _shallow_copy(self, values=None, **kwargs): # discards freq kwargs.pop("freq", None) return MultiIndex.from_tuples(values, names=names, **kwargs) - return self.copy(**kwargs) + + result = self.copy(**kwargs) + result._cache = self._cache.copy() + # GH32669 + if "levels" in result._cache: + del result._cache["levels"] + return result def _shallow_copy_with_infer(self, values, **kwargs): # On equal MultiIndexes the difference is empty. diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8ae792d3f63b5..f83234f1aac0b 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -251,11 +251,13 @@ def _has_complex_internals(self): def _shallow_copy(self, values=None, name: Label = no_default): name = name if name is not no_default else self.name - + cache = self._cache.copy() if values is None else {} if values is None: values = self._data - return self._simple_new(values, name=name) + result = self._simple_new(values, name=name) + result._cache = cache + return result def _maybe_convert_timedelta(self, other): """ diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index c21d8df2476b3..2c038564f4e6f 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -141,7 +141,7 @@ def _simple_new(cls, values: range, name: Label = None) -> "RangeIndex": result._range = values result.name = name - + result._cache = {} result._reset_identity() return result @@ -391,7 +391,9 @@ def _shallow_copy(self, values=None, name: Label = no_default): name = self.name if name is no_default else name if values is None: - return self._simple_new(self._range, name=name) + result = self._simple_new(self._range, name=name) + result._cache = self._cache.copy() + return result else: return Int64Index._simple_new(values, name=name) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index e5af0d9c03979..c13385c135e9f 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -919,3 +919,16 @@ def test_contains_requires_hashable_raises(self): with pytest.raises(TypeError): {} in idx._engine + + def test_shallow_copy_copies_cache(self): + # GH32669 + idx = self.create_index() + idx.get_loc(idx[0]) # populates the _cache. + shallow_copy = idx._shallow_copy() + + # check that the shallow_copied cache is a copy of the original + assert idx._cache == shallow_copy._cache + assert idx._cache is not shallow_copy._cache + # cache values should reference the same object + for key, val in idx._cache.items(): + assert shallow_copy._cache[key] is val, key