-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: avoid creating reference cycle on indexing (#15746) #17956
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
9600371
ce2c6b5
28c5056
39feabd
8e28ff8
8f07a17
01ce596
efe021d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# cython: profile=False | ||
|
||
cdef class _NDFrameIndexerBase: | ||
''' | ||
A base class for _NDFrameIndexer for fast instantiation and attribute | ||
access. | ||
''' | ||
cdef public object obj, name, _ndim | ||
|
||
def __init__(self, name, obj): | ||
self.obj = obj | ||
self.name = name | ||
self._ndim = None | ||
|
||
@property | ||
def ndim(self): | ||
# Delay `ndim` instantiation until required as reading it | ||
# from `obj` isn't entirely cheap. | ||
ndim = self._ndim | ||
if ndim is None: | ||
ndim = self._ndim = self.obj.ndim | ||
return ndim |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
from pandas.core.common import (is_bool_indexer, _asarray_tuplesafe, | ||
is_null_slice, is_full_slice, | ||
_values_from_object) | ||
from pandas._libs.indexing import _NDFrameIndexerBase | ||
|
||
|
||
# the supported indexers | ||
|
@@ -85,19 +86,14 @@ class IndexingError(Exception): | |
pass | ||
|
||
|
||
class _NDFrameIndexer(object): | ||
class _NDFrameIndexer(_NDFrameIndexerBase): | ||
_valid_types = None | ||
_exception = KeyError | ||
axis = None | ||
|
||
def __init__(self, obj, name): | ||
self.obj = obj | ||
self.ndim = obj.ndim | ||
self.name = name | ||
|
||
def __call__(self, axis=None): | ||
# we need to return a copy of ourselves | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. did you reverse these on purpose? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, so that we can use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ok this is not a big deal to reverse. |
||
new_self = self.__class__(self.obj, self.name) | ||
new_self = self.__class__(self.name, self.obj) | ||
|
||
if axis is not None: | ||
axis = self.obj._get_axis_number(axis) | ||
|
@@ -1321,7 +1317,7 @@ class _IXIndexer(_NDFrameIndexer): | |
|
||
""" | ||
|
||
def __init__(self, obj, name): | ||
def __init__(self, name, obj): | ||
|
||
_ix_deprecation_warning = textwrap.dedent(""" | ||
.ix is deprecated. Please use | ||
|
@@ -1333,7 +1329,7 @@ def __init__(self, obj, name): | |
|
||
warnings.warn(_ix_deprecation_warning, | ||
DeprecationWarning, stacklevel=3) | ||
super(_IXIndexer, self).__init__(obj, name) | ||
super(_IXIndexer, self).__init__(name, obj) | ||
|
||
def _has_valid_type(self, key, axis): | ||
if isinstance(key, slice): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,6 +5,7 @@ | |
|
||
import pytest | ||
|
||
import weakref | ||
from warnings import catch_warnings | ||
from datetime import datetime | ||
|
||
|
@@ -881,6 +882,14 @@ def test_partial_boolean_frame_indexing(self): | |
columns=list('ABC')) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_no_reference_cycle(self): | ||
df = pd.DataFrame({'a': [0, 1], 'b': [2, 3]}) | ||
for name in ('loc', 'iloc', 'ix', 'at', 'iat'): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would it make sense to do a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is no need for that, the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was only wondering if the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, In that case the test would fail most of the time. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why most of the time? It is very unlikely, so it would fail rarely, right? You can check that this assertion erroneously passes with pandas 0.20.3 where there are definitely cyclic references by forcing an "accidental" GC:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You said « the weakref can become None "by chance" ». If that's the only reason the test succeeds, then it would fail most of the time, since the chance of a GC happening between two consecutive opcodes is slim. I'm not sure what your code snippet is supposed to prove. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What I'm trying to say: Checking for |
||
getattr(df, name) | ||
wr = weakref.ref(df) | ||
del df | ||
assert wr() is None | ||
|
||
|
||
class TestSeriesNoneCoercion(object): | ||
EXPECTED_RESULTS = [ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
hmm I think you need to leave this. but only an asv will tell us. can you run the asv's for indexing (and add one which replicates the issue that initiated this PR).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok, I ran the benchmarks. The only significant difference is that
.ix
becomes slower, because the deprecation warning is emitter at each instantiation. Since it's deprecated anyway, I'm not sure performance is important.