Skip to content

Backport PR #27607 on branch 0.25.x (BUG: break reference cycle in Index._engine) #27828

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions asv_bench/benchmarks/index_object.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import gc
import numpy as np
import pandas.util.testing as tm
from pandas import (
Expand Down Expand Up @@ -225,4 +226,21 @@ def time_intersection_both_duplicate(self, N):
self.intv.intersection(self.intv2)


class GC:
params = [1, 2, 5]

def create_use_drop(self):
idx = Index(list(range(1000 * 1000)))
idx._engine

def peakmem_gc_instances(self, N):
try:
gc.disable()

for _ in range(N):
self.create_use_drop()
finally:
gc.enable()


from .pandas_vb_common import setup # noqa: F401
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.25.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ Indexing
^^^^^^^^

- Bug in partial-string indexing returning a NumPy array rather than a ``Series`` when indexing with a scalar like ``.loc['2015']`` (:issue:`27516`)
- Break reference cycle involving :class:`Index` to allow garbage collection of :class:`Index` objects without running the GC. (:issue:`27585`)
-
-

Expand Down
6 changes: 5 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -710,7 +710,11 @@ def _cleanup(self):
@cache_readonly
def _engine(self):
# property, for now, slow to look up
return self._engine_type(lambda: self._ndarray_values, len(self))

# to avoid a refernce cycle, bind `_ndarray_values` to a local variable, so
# `self` is not passed into the lambda.
_ndarray_values = self._ndarray_values
return self._engine_type(lambda: _ndarray_values, len(self))

# --------------------------------------------------------------------
# Array-Like Methods
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/indexes/test_base.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from collections import defaultdict
from datetime import datetime, timedelta
import gc
from io import StringIO
import math
import operator
Expand Down Expand Up @@ -2424,6 +2425,13 @@ def test_deprecated_contains(self):
with tm.assert_produces_warning(FutureWarning):
index.contains(1)

def test_engine_reference_cycle(self):
# https://github.com/pandas-dev/pandas/issues/27585
index = pd.Index([1, 2, 3])
nrefs_pre = len(gc.get_referrers(index))
index._engine
assert len(gc.get_referrers(index)) == nrefs_pre


class TestMixedIntIndex(Base):
# Mostly the tests from common.py for which the results differ
Expand Down