diff --git a/asv_bench/benchmarks/index_object.py b/asv_bench/benchmarks/index_object.py index 6541ddcb0397d..49834ae94cc38 100644 --- a/asv_bench/benchmarks/index_object.py +++ b/asv_bench/benchmarks/index_object.py @@ -1,3 +1,4 @@ +import gc import numpy as np import pandas.util.testing as tm from pandas import ( @@ -225,4 +226,21 @@ def time_intersection_both_duplicate(self, N): self.intv.intersection(self.intv2) +class GC: + params = [1, 2, 5] + + def create_use_drop(self): + idx = Index(list(range(1000 * 1000))) + idx._engine + + def peakmem_gc_instances(self, N): + try: + gc.disable() + + for _ in range(N): + self.create_use_drop() + finally: + gc.enable() + + from .pandas_vb_common import setup # noqa: F401 diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index f1d3f152e503d..443baa56374ca 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -83,6 +83,7 @@ Indexing ^^^^^^^^ - Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`) +- Break reference cycle involving :class:`Index` to allow garbage collection of :class:`Index` objects without running the GC. (:issue:`27585`) - - diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 17122d0981995..9297d02282a23 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -710,7 +710,11 @@ def _cleanup(self): @cache_readonly def _engine(self): # property, for now, slow to look up - return self._engine_type(lambda: self._ndarray_values, len(self)) + + # to avoid a refernce cycle, bind `_ndarray_values` to a local variable, so + # `self` is not passed into the lambda. + _ndarray_values = self._ndarray_values + return self._engine_type(lambda: _ndarray_values, len(self)) # -------------------------------------------------------------------- # Array-Like Methods diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index c40a9bce9385b..34d82525495fc 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -1,5 +1,6 @@ from collections import defaultdict from datetime import datetime, timedelta +import gc from io import StringIO import math import operator @@ -2424,6 +2425,13 @@ def test_deprecated_contains(self): with tm.assert_produces_warning(FutureWarning): index.contains(1) + def test_engine_reference_cycle(self): + # https://github.com/pandas-dev/pandas/issues/27585 + index = pd.Index([1, 2, 3]) + nrefs_pre = len(gc.get_referrers(index)) + index._engine + assert len(gc.get_referrers(index)) == nrefs_pre + class TestMixedIntIndex(Base): # Mostly the tests from common.py for which the results differ