From 960037196a426b7427da87783109b75d5416a914 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 23 Oct 2017 17:47:59 +0200 Subject: [PATCH 1/7] ENH: avoid creating reference cycle on indexing (#15746) --- pandas/_libs/lib.pyx | 22 ++++++++++++++++++++++ pandas/core/generic.py | 15 ++------------- pandas/core/indexing.py | 14 +++++--------- pandas/tests/indexing/test_indexing.py | 9 +++++++++ 4 files changed, 38 insertions(+), 22 deletions(-) diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index e7e92b7ae987a..78631294282bc 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1839,5 +1839,27 @@ cdef class BlockPlacement: return self._as_slice +cdef class _NDFrameIndexerBase: + ''' + A base class for _NDFrameIndexer for fast instantiation and attribute + access. + ''' + cdef public object obj, name, _ndim + + def __init__(self, name, obj): + self.obj = obj + self.name = name + self._ndim = None + + @property + def ndim(self): + # Delay `ndim` instantiation until required as reading it + # from `obj` isn't entirely cheap. + ndim = self._ndim + if ndim is None: + ndim = self._ndim = self.obj.ndim + return ndim + + include "reduce.pyx" include "inference.pyx" diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3e5828fc59932..23798dd574f8f 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1841,23 +1841,12 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, @classmethod def _create_indexer(cls, name, indexer): """Create an indexer like _name in the class.""" + from functools import partial if getattr(cls, name, None) is None: - iname = '_%s' % name - setattr(cls, iname, None) - - def _indexer(self): - i = getattr(self, iname) - if i is None: - i = indexer(self, name) - setattr(self, iname, i) - return i - + _indexer = partial(indexer, name) setattr(cls, name, property(_indexer, doc=indexer.__doc__)) - # add to our internal names set - cls._internal_names_set.add(iname) - def get(self, key, default=None): """ Get item from object for given key (DataFrame column, Panel slice, diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index 654c3510b7cf7..de00a67f17eb5 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -23,6 +23,7 @@ from pandas.core.common import (is_bool_indexer, _asarray_tuplesafe, is_null_slice, is_full_slice, _values_from_object) +from pandas._libs.lib import _NDFrameIndexerBase # the supported indexers @@ -85,19 +86,14 @@ class IndexingError(Exception): pass -class _NDFrameIndexer(object): +class _NDFrameIndexer(_NDFrameIndexerBase): _valid_types = None _exception = KeyError axis = None - def __init__(self, obj, name): - self.obj = obj - self.ndim = obj.ndim - self.name = name - def __call__(self, axis=None): # we need to return a copy of ourselves - new_self = self.__class__(self.obj, self.name) + new_self = self.__class__(self.name, self.obj) if axis is not None: axis = self.obj._get_axis_number(axis) @@ -1321,7 +1317,7 @@ class _IXIndexer(_NDFrameIndexer): """ - def __init__(self, obj, name): + def __init__(self, name, obj): _ix_deprecation_warning = textwrap.dedent(""" .ix is deprecated. Please use @@ -1333,7 +1329,7 @@ def __init__(self, obj, name): warnings.warn(_ix_deprecation_warning, DeprecationWarning, stacklevel=3) - super(_IXIndexer, self).__init__(obj, name) + super(_IXIndexer, self).__init__(name, obj) def _has_valid_type(self, key, axis): if isinstance(key, slice): diff --git a/pandas/tests/indexing/test_indexing.py b/pandas/tests/indexing/test_indexing.py index d64ed98243d72..ec62023e75db4 100644 --- a/pandas/tests/indexing/test_indexing.py +++ b/pandas/tests/indexing/test_indexing.py @@ -5,6 +5,7 @@ import pytest +import weakref from warnings import catch_warnings from datetime import datetime @@ -881,6 +882,14 @@ def test_partial_boolean_frame_indexing(self): columns=list('ABC')) tm.assert_frame_equal(result, expected) + def test_no_reference_cycle(self): + df = pd.DataFrame({'a': [0, 1], 'b': [2, 3]}) + for name in ('loc', 'iloc', 'ix', 'at', 'iat'): + getattr(df, name) + wr = weakref.ref(df) + del df + assert wr() is None + class TestSeriesNoneCoercion(object): EXPECTED_RESULTS = [ From ce2c6b511a98e45b0b3808a1ae02f0158ddc7a39 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Mon, 23 Oct 2017 18:14:12 +0200 Subject: [PATCH 2/7] Use a dedicated indexing.pyx module in pandas._libs --- pandas/_libs/indexing.pyx | 22 ++++++++++++++++++++++ pandas/_libs/lib.pyx | 22 ---------------------- pandas/core/generic.py | 5 ++--- pandas/core/indexing.py | 2 +- setup.py | 2 ++ 5 files changed, 27 insertions(+), 26 deletions(-) create mode 100644 pandas/_libs/indexing.pyx diff --git a/pandas/_libs/indexing.pyx b/pandas/_libs/indexing.pyx new file mode 100644 index 0000000000000..fb707a3c3e5e2 --- /dev/null +++ b/pandas/_libs/indexing.pyx @@ -0,0 +1,22 @@ +# cython: profile=False + +cdef class _NDFrameIndexerBase: + ''' + A base class for _NDFrameIndexer for fast instantiation and attribute + access. + ''' + cdef public object obj, name, _ndim + + def __init__(self, name, obj): + self.obj = obj + self.name = name + self._ndim = None + + @property + def ndim(self): + # Delay `ndim` instantiation until required as reading it + # from `obj` isn't entirely cheap. + ndim = self._ndim + if ndim is None: + ndim = self._ndim = self.obj.ndim + return ndim diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx index 78631294282bc..e7e92b7ae987a 100644 --- a/pandas/_libs/lib.pyx +++ b/pandas/_libs/lib.pyx @@ -1839,27 +1839,5 @@ cdef class BlockPlacement: return self._as_slice -cdef class _NDFrameIndexerBase: - ''' - A base class for _NDFrameIndexer for fast instantiation and attribute - access. - ''' - cdef public object obj, name, _ndim - - def __init__(self, name, obj): - self.obj = obj - self.name = name - self._ndim = None - - @property - def ndim(self): - # Delay `ndim` instantiation until required as reading it - # from `obj` isn't entirely cheap. - ndim = self._ndim - if ndim is None: - ndim = self._ndim = self.obj.ndim - return ndim - - include "reduce.pyx" include "inference.pyx" diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 23798dd574f8f..df2d927ee8ac2 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -1,5 +1,6 @@ # pylint: disable=W0231,E1101 import collections +import functools import warnings import operator import weakref @@ -1841,10 +1842,8 @@ def to_latex(self, buf=None, columns=None, col_space=None, header=True, @classmethod def _create_indexer(cls, name, indexer): """Create an indexer like _name in the class.""" - from functools import partial - if getattr(cls, name, None) is None: - _indexer = partial(indexer, name) + _indexer = functools.partial(indexer, name) setattr(cls, name, property(_indexer, doc=indexer.__doc__)) def get(self, key, default=None): diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index de00a67f17eb5..a8f09ab35bc3c 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -23,7 +23,7 @@ from pandas.core.common import (is_bool_indexer, _asarray_tuplesafe, is_null_slice, is_full_slice, _values_from_object) -from pandas._libs.lib import _NDFrameIndexerBase +from pandas._libs.indexing import _NDFrameIndexerBase # the supported indexers diff --git a/setup.py b/setup.py index 158ee9493b6ac..2b4cc0ce019e2 100755 --- a/setup.py +++ b/setup.py @@ -335,6 +335,7 @@ class CheckSDist(sdist_class): 'pandas/_libs/index.pyx', 'pandas/_libs/algos.pyx', 'pandas/_libs/join.pyx', + 'pandas/_libs/indexing.pyx', 'pandas/_libs/interval.pyx', 'pandas/_libs/hashing.pyx', 'pandas/_libs/testing.pyx', @@ -519,6 +520,7 @@ def pxd(name): 'depends': _pxi_dep['join']}, '_libs.reshape': {'pyxfile': '_libs/reshape', 'depends': _pxi_dep['reshape']}, + '_libs.indexing': {'pyxfile': '_libs/indexing'}, '_libs.interval': {'pyxfile': '_libs/interval', 'pxdfiles': ['_libs/hashtable'], 'depends': _pxi_dep['interval']}, From 28c5056b42cf8f0820b65194a4408d8104621673 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 24 Oct 2017 00:14:31 +0200 Subject: [PATCH 3/7] Fix failed test on Python 2.7 --- pandas/tests/indexing/test_ix.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index dc9a591ee3101..c39ad44849052 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -21,7 +21,8 @@ def test_ix_deprecation(self): df = DataFrame({'A': [1, 2, 3]}) with tm.assert_produces_warning(DeprecationWarning, - check_stacklevel=False): + check_stacklevel=False, + clear=[pd.core.indexing]): df.ix[1, 'A'] def test_ix_loc_setitem_consistency(self): From 39feabdba0f92e7eccff5e74cd338cc42a9ed4a8 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 24 Oct 2017 00:34:51 +0200 Subject: [PATCH 4/7] Actually (hopefully) fix test failure on 2.7 --- pandas/core/indexing.py | 2 +- pandas/tests/indexing/test_ix.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py index a8f09ab35bc3c..b2720078635a4 100755 --- a/pandas/core/indexing.py +++ b/pandas/core/indexing.py @@ -1328,7 +1328,7 @@ def __init__(self, name, obj): http://pandas.pydata.org/pandas-docs/stable/indexing.html#ix-indexer-is-deprecated""") # noqa warnings.warn(_ix_deprecation_warning, - DeprecationWarning, stacklevel=3) + DeprecationWarning, stacklevel=2) super(_IXIndexer, self).__init__(name, obj) def _has_valid_type(self, key, axis): diff --git a/pandas/tests/indexing/test_ix.py b/pandas/tests/indexing/test_ix.py index c39ad44849052..dc9a591ee3101 100644 --- a/pandas/tests/indexing/test_ix.py +++ b/pandas/tests/indexing/test_ix.py @@ -21,8 +21,7 @@ def test_ix_deprecation(self): df = DataFrame({'A': [1, 2, 3]}) with tm.assert_produces_warning(DeprecationWarning, - check_stacklevel=False, - clear=[pd.core.indexing]): + check_stacklevel=False): df.ix[1, 'A'] def test_ix_loc_setitem_consistency(self): From 8e28ff81d9763837170b13c4a2d89bbf2212a187 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Tue, 24 Oct 2017 10:35:17 +0200 Subject: [PATCH 5/7] Add index lookup benchmark --- asv_bench/benchmarks/indexing.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index d941ef20dc7ac..551b35e04d806 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -287,3 +287,13 @@ def setup(self): def time_subset(self): self.p.ix[(self.inds, self.inds, self.inds)] + + +class IndexerLookup(object): + goal_time = 0.2 + + def setup(self): + self.s = Series(range(10)) + + def time_lookup_iloc(self): + self.s.iloc From 8f07a17652386ebed87ef785624cdec053104ab1 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Wed, 25 Oct 2017 14:26:56 +0200 Subject: [PATCH 6/7] Add similar benchmarks for `.iloc` and `.ix` --- asv_bench/benchmarks/indexing.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/asv_bench/benchmarks/indexing.py b/asv_bench/benchmarks/indexing.py index 551b35e04d806..f3e7ebbbd33e8 100644 --- a/asv_bench/benchmarks/indexing.py +++ b/asv_bench/benchmarks/indexing.py @@ -297,3 +297,9 @@ def setup(self): def time_lookup_iloc(self): self.s.iloc + + def time_lookup_ix(self): + self.s.ix + + def time_lookup_loc(self): + self.s.loc From efe021daebcca3d706aa8ced3eb90c2e12c41fb4 Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Fri, 27 Oct 2017 13:09:02 +0200 Subject: [PATCH 7/7] Add whatsnew entry --- doc/source/whatsnew/v0.22.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 53b052a955b45..847ae4f0fbf6b 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -67,7 +67,7 @@ Removal of prior version deprecations/changes Performance Improvements ~~~~~~~~~~~~~~~~~~~~~~~~ -- +- Indexers on Series or DataFrame no longer create a reference cycle (:issue:`17956`) - -