Skip to content

Commit c3a2135

Browse files
GianlucaFicarellipmhatre1
authored andcommitted
PERF: MultiIndex.memory_usage shouldn't trigger the index engine (pandas-dev#58385)
* PERF: MultiIndex.memory_usage shouldn't trigger the index engine Ignore the index engine when it isn't already cached. * Move test, sort whatsnew
1 parent 24e552c commit c3a2135

File tree

4 files changed

+31
-4
lines changed

4 files changed

+31
-4
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -333,6 +333,7 @@ Performance improvements
333333
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
334334
- Performance improvement in :meth:`Index.to_frame` returning a :class:`RangeIndex` columns of a :class:`Index` when possible. (:issue:`58018`)
335335
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
336+
- Performance improvement in :meth:`MultiIndex.memory_usage` to ignore the index engine when it isn't already cached. (:issue:`58385`)
336337
- Performance improvement in :meth:`RangeIndex.__getitem__` with a boolean mask or integers returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57588`)
337338
- Performance improvement in :meth:`RangeIndex.append` when appending the same index (:issue:`57252`)
338339
- Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`)

pandas/core/indexes/base.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -4844,8 +4844,9 @@ def _from_join_target(self, result: np.ndarray) -> ArrayLike:
48444844
def memory_usage(self, deep: bool = False) -> int:
48454845
result = self._memory_usage(deep=deep)
48464846

4847-
# include our engine hashtable
4848-
result += self._engine.sizeof(deep=deep)
4847+
# include our engine hashtable, only if it's already cached
4848+
if "_engine" in self._cache:
4849+
result += self._engine.sizeof(deep=deep)
48494850
return result
48504851

48514852
@final

pandas/core/indexes/multi.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1391,8 +1391,9 @@ def _nbytes(self, deep: bool = False) -> int:
13911391
names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
13921392
result = level_nbytes + label_nbytes + names_nbytes
13931393

1394-
# include our engine hashtable
1395-
result += self._engine.sizeof(deep=deep)
1394+
# include our engine hashtable, only if it's already cached
1395+
if "_engine" in self._cache:
1396+
result += self._engine.sizeof(deep=deep)
13961397
return result
13971398

13981399
# --------------------------------------------------------------------

pandas/tests/indexes/test_old_base.py

+24
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,30 @@ def test_memory_usage(self, index):
326326
if index.inferred_type == "object":
327327
assert result3 > result2
328328

329+
def test_memory_usage_doesnt_trigger_engine(self, index):
330+
index._cache.clear()
331+
assert "_engine" not in index._cache
332+
333+
res_without_engine = index.memory_usage()
334+
assert "_engine" not in index._cache
335+
336+
# explicitly load and cache the engine
337+
_ = index._engine
338+
assert "_engine" in index._cache
339+
340+
res_with_engine = index.memory_usage()
341+
342+
# the empty engine doesn't affect the result even when initialized with values,
343+
# because engine.sizeof() doesn't consider the content of engine.values
344+
assert res_with_engine == res_without_engine
345+
346+
if len(index) == 0:
347+
assert res_without_engine == 0
348+
assert res_with_engine == 0
349+
else:
350+
assert res_without_engine > 0
351+
assert res_with_engine > 0
352+
329353
def test_argsort(self, index):
330354
if isinstance(index, CategoricalIndex):
331355
pytest.skip(f"{type(self).__name__} separately tested")

0 commit comments

Comments
 (0)