Skip to content

Commit 6d4e407

Browse files
PERF: MultiIndex.memory_usage shouldn't trigger the index engine
Ignore the index engine when it isn't already cached.
1 parent b111ac6 commit 6d4e407

File tree

4 files changed

+32
-4
lines changed

4 files changed

+32
-4
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -342,6 +342,7 @@ Performance improvements
342342
- Performance improvement in ``DataFrameGroupBy.__len__`` and ``SeriesGroupBy.__len__`` (:issue:`57595`)
343343
- Performance improvement in indexing operations for string dtypes (:issue:`56997`)
344344
- Performance improvement in unary methods on a :class:`RangeIndex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57825`)
345+
- Performance improvement in :meth:`MultiIndex.memory_usage` to ignore the index engine when it isn't already cached. (:issue:`58385`)
345346

346347
.. ---------------------------------------------------------------------------
347348
.. _whatsnew_300.bug_fixes:

pandas/core/indexes/base.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -4841,8 +4841,9 @@ def _from_join_target(self, result: np.ndarray) -> ArrayLike:
48414841
def memory_usage(self, deep: bool = False) -> int:
48424842
result = self._memory_usage(deep=deep)
48434843

4844-
# include our engine hashtable
4845-
result += self._engine.sizeof(deep=deep)
4844+
# include our engine hashtable, only if it's already cached
4845+
if "_engine" in self._cache:
4846+
result += self._engine.sizeof(deep=deep)
48464847
return result
48474848

48484849
@final

pandas/core/indexes/multi.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1391,8 +1391,9 @@ def _nbytes(self, deep: bool = False) -> int:
13911391
names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
13921392
result = level_nbytes + label_nbytes + names_nbytes
13931393

1394-
# include our engine hashtable
1395-
result += self._engine.sizeof(deep=deep)
1394+
# include our engine hashtable, only if it's already cached
1395+
if "_engine" in self._cache:
1396+
result += self._engine.sizeof(deep=deep)
13961397
return result
13971398

13981399
# --------------------------------------------------------------------

pandas/tests/base/test_misc.py

+25
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,31 @@ def test_memory_usage_components_narrow_series(any_real_numpy_dtype):
142142
assert total_usage == non_index_usage + index_usage
143143

144144

145+
def test_memory_usage_doesnt_trigger_engine(index):
146+
index._cache.clear()
147+
assert "_engine" not in index._cache
148+
149+
res_without_engine = index.memory_usage()
150+
assert "_engine" not in index._cache
151+
152+
# explicitly load and cache the engine
153+
_ = index._engine
154+
assert "_engine" in index._cache
155+
156+
res_with_engine = index.memory_usage()
157+
158+
# the engine doesn't affect the result even when initialized with values, because
159+
# index._engine.sizeof() doesn't consider the content of index._engine.values
160+
assert res_with_engine == res_without_engine
161+
162+
if len(index) == 0:
163+
assert res_without_engine == 0
164+
assert res_with_engine == 0
165+
else:
166+
assert res_without_engine > 0
167+
assert res_with_engine > 0
168+
169+
145170
def test_searchsorted(request, index_or_series_obj):
146171
# numpy.searchsorted calls obj.searchsorted under the hood.
147172
# See gh-12238

0 commit comments

Comments
 (0)