Skip to content

PERF: MultiIndex._shallow_copy #32669

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Mar 14, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ Performance improvements
- Performance improvement in flex arithmetic ops between :class:`DataFrame` and :class:`Series` with ``axis=0`` (:issue:`31296`)
- The internal index method :meth:`~Index._shallow_copy` now copies cached attributes over to the new index,
avoiding creating these again on the new index. This can speed up many operations that depend on creating copies of
existing indexes (:issue:`28584`, :issue:`32640`)
existing indexes (:issue:`28584`, :issue:`32640`, :issue:`32669`)

.. ---------------------------------------------------------------------------

Expand Down
9 changes: 8 additions & 1 deletion pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ def __new__(
raise ValueError("Must pass non-zero number of levels/codes")

result = object.__new__(MultiIndex)
result._cache = {}

# we've already validated levels and codes, so shortcut here
result._set_levels(levels, copy=copy, validate=False)
Expand Down Expand Up @@ -991,7 +992,13 @@ def _shallow_copy(self, values=None, **kwargs):
# discards freq
kwargs.pop("freq", None)
return MultiIndex.from_tuples(values, names=names, **kwargs)
return self.copy(**kwargs)

result = self.copy(**kwargs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

comment pointing back to this thread?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok

result._cache = self._cache.copy()
# GH32669
if "levels" in result._cache:
del result._cache["levels"]
return result

def _shallow_copy_with_infer(self, values, **kwargs):
# On equal MultiIndexes the difference is empty.
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,11 +251,13 @@ def _has_complex_internals(self):

def _shallow_copy(self, values=None, name: Label = no_default):
name = name if name is not no_default else self.name

cache = self._cache.copy() if values is None else {}
if values is None:
values = self._data

return self._simple_new(values, name=name)
result = self._simple_new(values, name=name)
result._cache = cache
return result

def _maybe_convert_timedelta(self, other):
"""
Expand Down
6 changes: 4 additions & 2 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ def _simple_new(cls, values: range, name: Label = None) -> "RangeIndex":

result._range = values
result.name = name

result._cache = {}
result._reset_identity()
return result

Expand Down Expand Up @@ -391,7 +391,9 @@ def _shallow_copy(self, values=None, name: Label = no_default):
name = self.name if name is no_default else name

if values is None:
return self._simple_new(self._range, name=name)
result = self._simple_new(self._range, name=name)
result._cache = self._cache.copy()
return result
else:
return Int64Index._simple_new(values, name=name)

Expand Down
13 changes: 13 additions & 0 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -919,3 +919,16 @@ def test_contains_requires_hashable_raises(self):

with pytest.raises(TypeError):
{} in idx._engine

def test_shallow_copy_copies_cache(self):
# GH32669
idx = self.create_index()
idx.get_loc(idx[0]) # populates the _cache.
shallow_copy = idx._shallow_copy()

# check that the shallow_copied cache is a copy of the original
assert idx._cache == shallow_copy._cache
assert idx._cache is not shallow_copy._cache
# cache values should reference the same object
for key, val in idx._cache.items():
assert shallow_copy._cache[key] is val, key