Skip to content

Commit b58ee57

Browse files
topper-123jreback
andauthored
PERF: Index._shallow_copy shares _cache with copies of self (#36840)
* PERF: Index.equals when comparing to copies of self * refactor _shallow_copy, add GH number * PERF: share _cache, don't share _id * rename tests * fix memory usage test Co-authored-by: Jeff Reback <[email protected]>
1 parent 5756039 commit b58ee57

File tree

8 files changed

+40
-45
lines changed

8 files changed

+40
-45
lines changed

doc/source/whatsnew/v1.2.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,8 @@ Performance improvements
285285
- ``Styler`` uuid method altered to compress data transmission over web whilst maintaining reasonably low table collision probability (:issue:`36345`)
286286
- Performance improvement in :meth:`pd.to_datetime` with non-ns time unit for ``float`` ``dtype`` columns (:issue:`20445`)
287287
- Performance improvement in setting values on a :class:`IntervalArray` (:issue:`36310`)
288+
- The internal index method :meth:`~Index._shallow_copy` now makes the new index and original index share cached attributes,
289+
avoiding creating these again, if created on either. This can speed up operations that depend on creating copies of existing indexes (:issue:`36840`)
288290
- Performance improvement in :meth:`RollingGroupby.count` (:issue:`35625`)
289291

290292
.. ---------------------------------------------------------------------------

pandas/core/indexes/base.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -561,12 +561,12 @@ def _shallow_copy(self, values=None, name: Label = no_default):
561561
name : Label, defaults to self.name
562562
"""
563563
name = self.name if name is no_default else name
564-
cache = self._cache.copy() if values is None else {}
565-
if values is None:
566-
values = self._values
567564

568-
result = self._simple_new(values, name=name)
569-
result._cache = cache
565+
if values is not None:
566+
return self._simple_new(values, name=name)
567+
568+
result = self._simple_new(self._values, name=name)
569+
result._cache = self._cache
570570
return result
571571

572572
def is_(self, other) -> bool:

pandas/core/indexes/datetimelike.py

+6-8
Original file line numberDiff line numberDiff line change
@@ -671,17 +671,15 @@ def _with_freq(self, freq):
671671

672672
def _shallow_copy(self, values=None, name: Label = lib.no_default):
673673
name = self.name if name is lib.no_default else name
674-
cache = self._cache.copy() if values is None else {}
675674

676-
if values is None:
677-
values = self._data
678-
679-
if isinstance(values, np.ndarray):
675+
if values is not None:
680676
# TODO: We would rather not get here
681-
values = type(self._data)(values, dtype=self.dtype)
677+
if isinstance(values, np.ndarray):
678+
values = type(self._data)(values, dtype=self.dtype)
679+
return self._simple_new(values, name=name)
682680

683-
result = type(self)._simple_new(values, name=name)
684-
result._cache = cache
681+
result = self._simple_new(self._data, name=name)
682+
result._cache = self._cache
685683
return result
686684

687685
# --------------------------------------------------------------------

pandas/core/indexes/interval.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -335,12 +335,12 @@ def _shallow_copy(
335335
self, values: Optional[IntervalArray] = None, name: Label = lib.no_default
336336
):
337337
name = self.name if name is lib.no_default else name
338-
cache = self._cache.copy() if values is None else {}
339-
if values is None:
340-
values = self._data
341338

342-
result = self._simple_new(values, name=name)
343-
result._cache = cache
339+
if values is not None:
340+
return self._simple_new(values, name=name)
341+
342+
result = self._simple_new(self._data, name=name)
343+
result._cache = self._cache
344344
return result
345345

346346
@cache_readonly

pandas/core/indexes/period.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -260,12 +260,12 @@ def _has_complex_internals(self) -> bool:
260260

261261
def _shallow_copy(self, values=None, name: Label = no_default):
262262
name = name if name is not no_default else self.name
263-
cache = self._cache.copy() if values is None else {}
264-
if values is None:
265-
values = self._data
266263

267-
result = self._simple_new(values, name=name)
268-
result._cache = cache
264+
if values is not None:
265+
return self._simple_new(values, name=name)
266+
267+
result = self._simple_new(self._data, name=name)
268+
result._cache = self._cache
269269
return result
270270

271271
def _maybe_convert_timedelta(self, other):

pandas/core/indexes/range.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -397,13 +397,13 @@ def __iter__(self):
397397
def _shallow_copy(self, values=None, name: Label = no_default):
398398
name = self.name if name is no_default else name
399399

400-
if values is None:
401-
result = self._simple_new(self._range, name=name)
402-
result._cache = self._cache.copy()
403-
return result
404-
else:
400+
if values is not None:
405401
return Int64Index._simple_new(values, name=name)
406402

403+
result = self._simple_new(self._range, name=name)
404+
result._cache = self._cache
405+
return result
406+
407407
@doc(Int64Index.copy)
408408
def copy(self, name=None, deep=False, dtype=None, names=None):
409409
name = self._validate_names(name=name, names=names, deep=deep)[0]

pandas/tests/base/test_misc.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,8 @@ def test_memory_usage(index_or_series_obj):
128128
)
129129

130130
if len(obj) == 0:
131-
assert res_deep == res == 0
131+
expected = 0 if isinstance(obj, Index) else 80
132+
assert res_deep == res == expected
132133
elif is_object or is_categorical:
133134
# only deep will pick them up
134135
assert res_deep > res

pandas/tests/indexes/common.py

+10-16
Original file line numberDiff line numberDiff line change
@@ -935,28 +935,22 @@ def test_contains_requires_hashable_raises(self):
935935
with pytest.raises(TypeError, match=msg):
936936
{} in idx._engine
937937

938-
def test_copy_copies_cache(self):
939-
# GH32898
938+
def test_copy_shares_cache(self):
939+
# GH32898, GH36840
940940
idx = self.create_index()
941941
idx.get_loc(idx[0]) # populates the _cache.
942942
copy = idx.copy()
943943

944-
# check that the copied cache is a copy of the original
945-
assert idx._cache == copy._cache
946-
assert idx._cache is not copy._cache
947-
# cache values should reference the same object
948-
for key, val in idx._cache.items():
949-
assert copy._cache[key] is val, key
944+
assert copy._cache is idx._cache
950945

951-
def test_shallow_copy_copies_cache(self):
952-
# GH32669
946+
def test_shallow_copy_shares_cache(self):
947+
# GH32669, GH36840
953948
idx = self.create_index()
954949
idx.get_loc(idx[0]) # populates the _cache.
955950
shallow_copy = idx._shallow_copy()
956951

957-
# check that the shallow_copied cache is a copy of the original
958-
assert idx._cache == shallow_copy._cache
959-
assert idx._cache is not shallow_copy._cache
960-
# cache values should reference the same object
961-
for key, val in idx._cache.items():
962-
assert shallow_copy._cache[key] is val, key
952+
assert shallow_copy._cache is idx._cache
953+
954+
shallow_copy = idx._shallow_copy(idx._data)
955+
assert shallow_copy._cache is not idx._cache
956+
assert shallow_copy._cache == {}

0 commit comments

Comments
 (0)