Skip to content

Commit 8b6942f

Browse files
crepererumTomAugspurger
authored andcommitted
PERF: break reference cycle in Index._engine (#27607)
Fixes #27585
1 parent 78c6843 commit 8b6942f

File tree

4 files changed

+32
-1
lines changed

4 files changed

+32
-1
lines changed

asv_bench/benchmarks/index_object.py

+18
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import gc
12
import numpy as np
23
import pandas.util.testing as tm
34
from pandas import (
@@ -225,4 +226,21 @@ def time_intersection_both_duplicate(self, N):
225226
self.intv.intersection(self.intv2)
226227

227228

229+
class GC:
230+
params = [1, 2, 5]
231+
232+
def create_use_drop(self):
233+
idx = Index(list(range(1000 * 1000)))
234+
idx._engine
235+
236+
def peakmem_gc_instances(self, N):
237+
try:
238+
gc.disable()
239+
240+
for _ in range(N):
241+
self.create_use_drop()
242+
finally:
243+
gc.enable()
244+
245+
228246
from .pandas_vb_common import setup # noqa: F401

doc/source/whatsnew/v0.25.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ Indexing
8383
^^^^^^^^
8484

8585
- Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`)
86+
- Break reference cycle involving :class:`Index` to allow garbage collection of :class:`Index` objects without running the GC. (:issue:`27585`)
8687
-
8788
-
8889

pandas/core/indexes/base.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -691,7 +691,11 @@ def _cleanup(self):
691691
@cache_readonly
692692
def _engine(self):
693693
# property, for now, slow to look up
694-
return self._engine_type(lambda: self._ndarray_values, len(self))
694+
695+
# to avoid a refernce cycle, bind `_ndarray_values` to a local variable, so
696+
# `self` is not passed into the lambda.
697+
_ndarray_values = self._ndarray_values
698+
return self._engine_type(lambda: _ndarray_values, len(self))
695699

696700
# --------------------------------------------------------------------
697701
# Array-Like Methods

pandas/tests/indexes/test_base.py

+8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from collections import defaultdict
22
from datetime import datetime, timedelta
3+
import gc
34
from io import StringIO
45
import math
56
import operator
@@ -2424,6 +2425,13 @@ def test_deprecated_contains(self):
24242425
with tm.assert_produces_warning(FutureWarning):
24252426
index.contains(1)
24262427

2428+
def test_engine_reference_cycle(self):
2429+
# https://github.com/pandas-dev/pandas/issues/27585
2430+
index = pd.Index([1, 2, 3])
2431+
nrefs_pre = len(gc.get_referrers(index))
2432+
index._engine
2433+
assert len(gc.get_referrers(index)) == nrefs_pre
2434+
24272435

24282436
class TestMixedIntIndex(Base):
24292437
# Mostly the tests from common.py for which the results differ

0 commit comments

Comments
 (0)