Skip to content

Commit 7bd91e8

Browse files
meeseeksmachinejreback
authored andcommitted
Backport PR pandas-dev#27840: PERF: Break reference cycle for all Index types (pandas-dev#27918)
1 parent b154327 commit 7bd91e8

File tree

6 files changed

+20
-14
lines changed

6 files changed

+20
-14
lines changed

doc/source/whatsnew/v0.25.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ Indexing
8383
^^^^^^^^
8484

8585
- Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`)
86-
- Break reference cycle involving :class:`Index` to allow garbage collection of :class:`Index` objects without running the GC. (:issue:`27585`)
86+
- Break reference cycle involving :class:`Index` and other index classes to allow garbage collection of index objects without running the GC. (:issue:`27585`, :issue:`27840`)
8787
- Fix regression in assigning values to a single column of a DataFrame with a ``MultiIndex`` columns (:issue:`27841`).
8888
-
8989

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ def _cleanup(self):
711711
def _engine(self):
712712
# property, for now, slow to look up
713713

714-
# to avoid a refernce cycle, bind `_ndarray_values` to a local variable, so
714+
# to avoid a reference cycle, bind `_ndarray_values` to a local variable, so
715715
# `self` is not passed into the lambda.
716716
_ndarray_values = self._ndarray_values
717717
return self._engine_type(lambda: _ndarray_values, len(self))

pandas/core/indexes/category.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -445,9 +445,11 @@ def argsort(self, *args, **kwargs):
445445

446446
@cache_readonly
447447
def _engine(self):
448-
449-
# we are going to look things up with the codes themselves
450-
return self._engine_type(lambda: self.codes, len(self))
448+
# we are going to look things up with the codes themselves.
449+
# To avoid a reference cycle, bind `codes` to a local variable, so
450+
# `self` is not passed into the lambda.
451+
codes = self.codes
452+
return self._engine_type(lambda: codes, len(self))
451453

452454
# introspection
453455
@cache_readonly

pandas/core/indexes/period.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from datetime import datetime, timedelta
22
import warnings
3+
import weakref
34

45
import numpy as np
56

@@ -438,7 +439,9 @@ def _formatter_func(self):
438439

439440
@cache_readonly
440441
def _engine(self):
441-
return self._engine_type(lambda: self, len(self))
442+
# To avoid a reference cycle, pass a weakref of self to _engine_type.
443+
period = weakref.ref(self)
444+
return self._engine_type(period, len(self))
442445

443446
@Appender(_index_shared_docs["contains"])
444447
def __contains__(self, key):

pandas/tests/indexes/common.py

+9
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import gc
2+
13
import numpy as np
24
import pytest
35

@@ -908,3 +910,10 @@ def test_is_unique(self):
908910
# multiple NA should not be unique
909911
index_na_dup = index_na.insert(0, np.nan)
910912
assert index_na_dup.is_unique is False
913+
914+
def test_engine_reference_cycle(self):
915+
# GH27585
916+
index = self.create_index()
917+
nrefs_pre = len(gc.get_referrers(index))
918+
index._engine
919+
assert len(gc.get_referrers(index)) == nrefs_pre

pandas/tests/indexes/test_base.py

-8
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
from collections import defaultdict
22
from datetime import datetime, timedelta
3-
import gc
43
from io import StringIO
54
import math
65
import operator
@@ -2425,13 +2424,6 @@ def test_deprecated_contains(self):
24252424
with tm.assert_produces_warning(FutureWarning):
24262425
index.contains(1)
24272426

2428-
def test_engine_reference_cycle(self):
2429-
# https://github.com/pandas-dev/pandas/issues/27585
2430-
index = pd.Index([1, 2, 3])
2431-
nrefs_pre = len(gc.get_referrers(index))
2432-
index._engine
2433-
assert len(gc.get_referrers(index)) == nrefs_pre
2434-
24352427

24362428
class TestMixedIntIndex(Base):
24372429
# Mostly the tests from common.py for which the results differ

0 commit comments

Comments
 (0)