Skip to content

Commit 37c9cea

Browse files
pitroujreback
authored andcommitted
ENH: avoid creating reference cycle on indexing (pandas-dev#15746) (pandas-dev#17956)
1 parent bdeadb9 commit 37c9cea

File tree

7 files changed

+58
-25
lines changed

7 files changed

+58
-25
lines changed

asv_bench/benchmarks/indexing.py

+16
Original file line numberDiff line numberDiff line change
@@ -287,3 +287,19 @@ def setup(self):
287287

288288
def time_subset(self):
289289
self.p.ix[(self.inds, self.inds, self.inds)]
290+
291+
292+
class IndexerLookup(object):
293+
goal_time = 0.2
294+
295+
def setup(self):
296+
self.s = Series(range(10))
297+
298+
def time_lookup_iloc(self):
299+
self.s.iloc
300+
301+
def time_lookup_ix(self):
302+
self.s.ix
303+
304+
def time_lookup_loc(self):
305+
self.s.loc

doc/source/whatsnew/v0.22.0.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ Removal of prior version deprecations/changes
6767
Performance Improvements
6868
~~~~~~~~~~~~~~~~~~~~~~~~
6969

70-
-
70+
- Indexers on Series or DataFrame no longer create a reference cycle (:issue:`17956`)
7171
-
7272
-
7373

pandas/_libs/indexing.pyx

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# cython: profile=False
2+
3+
cdef class _NDFrameIndexerBase:
4+
'''
5+
A base class for _NDFrameIndexer for fast instantiation and attribute
6+
access.
7+
'''
8+
cdef public object obj, name, _ndim
9+
10+
def __init__(self, name, obj):
11+
self.obj = obj
12+
self.name = name
13+
self._ndim = None
14+
15+
@property
16+
def ndim(self):
17+
# Delay `ndim` instantiation until required as reading it
18+
# from `obj` isn't entirely cheap.
19+
ndim = self._ndim
20+
if ndim is None:
21+
ndim = self._ndim = self.obj.ndim
22+
return ndim

pandas/core/generic.py

+2-14
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# pylint: disable=W0231,E1101
22
import collections
3+
import functools
34
import warnings
45
import operator
56
import weakref
@@ -1796,23 +1797,10 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True,
17961797
@classmethod
17971798
def _create_indexer(cls, name, indexer):
17981799
"""Create an indexer like _name in the class."""
1799-
18001800
if getattr(cls, name, None) is None:
1801-
iname = '_%s' % name
1802-
setattr(cls, iname, None)
1803-
1804-
def _indexer(self):
1805-
i = getattr(self, iname)
1806-
if i is None:
1807-
i = indexer(self, name)
1808-
setattr(self, iname, i)
1809-
return i
1810-
1801+
_indexer = functools.partial(indexer, name)
18111802
setattr(cls, name, property(_indexer, doc=indexer.__doc__))
18121803

1813-
# add to our internal names set
1814-
cls._internal_names_set.add(iname)
1815-
18161804
def get(self, key, default=None):
18171805
"""
18181806
Get item from object for given key (DataFrame column, Panel slice,

pandas/core/indexing.py

+6-10
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
from pandas.core.common import (is_bool_indexer, _asarray_tuplesafe,
2424
is_null_slice, is_full_slice,
2525
_values_from_object)
26+
from pandas._libs.indexing import _NDFrameIndexerBase
2627

2728

2829
# the supported indexers
@@ -85,19 +86,14 @@ class IndexingError(Exception):
8586
pass
8687

8788

88-
class _NDFrameIndexer(object):
89+
class _NDFrameIndexer(_NDFrameIndexerBase):
8990
_valid_types = None
9091
_exception = KeyError
9192
axis = None
9293

93-
def __init__(self, obj, name):
94-
self.obj = obj
95-
self.ndim = obj.ndim
96-
self.name = name
97-
9894
def __call__(self, axis=None):
9995
# we need to return a copy of ourselves
100-
new_self = self.__class__(self.obj, self.name)
96+
new_self = self.__class__(self.name, self.obj)
10197

10298
if axis is not None:
10399
axis = self.obj._get_axis_number(axis)
@@ -1321,7 +1317,7 @@ class _IXIndexer(_NDFrameIndexer):
13211317
13221318
"""
13231319

1324-
def __init__(self, obj, name):
1320+
def __init__(self, name, obj):
13251321

13261322
_ix_deprecation_warning = textwrap.dedent("""
13271323
.ix is deprecated. Please use
@@ -1332,8 +1328,8 @@ def __init__(self, obj, name):
13321328
http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""") # noqa
13331329

13341330
warnings.warn(_ix_deprecation_warning,
1335-
DeprecationWarning, stacklevel=3)
1336-
super(_IXIndexer, self).__init__(obj, name)
1331+
DeprecationWarning, stacklevel=2)
1332+
super(_IXIndexer, self).__init__(name, obj)
13371333

13381334
def _has_valid_type(self, key, axis):
13391335
if isinstance(key, slice):

pandas/tests/indexing/test_indexing.py

+9
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import pytest
77

8+
import weakref
89
from warnings import catch_warnings
910
from datetime import datetime
1011

@@ -881,6 +882,14 @@ def test_partial_boolean_frame_indexing(self):
881882
columns=list('ABC'))
882883
tm.assert_frame_equal(result, expected)
883884

885+
def test_no_reference_cycle(self):
886+
df = pd.DataFrame({'a': [0, 1], 'b': [2, 3]})
887+
for name in ('loc', 'iloc', 'ix', 'at', 'iat'):
888+
getattr(df, name)
889+
wr = weakref.ref(df)
890+
del df
891+
assert wr() is None
892+
884893

885894
class TestSeriesNoneCoercion(object):
886895
EXPECTED_RESULTS = [

setup.py

+2
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,7 @@ class CheckSDist(sdist_class):
335335
'pandas/_libs/index.pyx',
336336
'pandas/_libs/algos.pyx',
337337
'pandas/_libs/join.pyx',
338+
'pandas/_libs/indexing.pyx',
338339
'pandas/_libs/interval.pyx',
339340
'pandas/_libs/hashing.pyx',
340341
'pandas/_libs/testing.pyx',
@@ -519,6 +520,7 @@ def pxd(name):
519520
'depends': _pxi_dep['join']},
520521
'_libs.reshape': {'pyxfile': '_libs/reshape',
521522
'depends': _pxi_dep['reshape']},
523+
'_libs.indexing': {'pyxfile': '_libs/indexing'},
522524
'_libs.interval': {'pyxfile': '_libs/interval',
523525
'pxdfiles': ['_libs/hashtable'],
524526
'depends': _pxi_dep['interval']},

0 commit comments

Comments
 (0)