From 18e24a93a9effed9fcfbf39ce11634e0007f80d8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Feb 2020 13:25:59 -0600 Subject: [PATCH 1/2] PERF: Cache MultiIndex.levels (#31651) * PERF: Cache MultiIndex.levels Closes https://github.com/pandas-dev/pandas/issues/31648 * fixup tests --- pandas/core/indexes/multi.py | 29 ++++++++++++++-------- pandas/tests/indexes/multi/test_get_set.py | 2 +- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 75b96666080aa..6c125e17e5549 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -620,16 +620,6 @@ def from_frame(cls, df, sortorder=None, names=None): # -------------------------------------------------------------------- - @property - def levels(self): - result = [ - x._shallow_copy(name=name) for x, name in zip(self._levels, self._names) - ] - for level in result: - # disallow midx.levels[0].name = "foo" - level._no_setting_name = True - return FrozenList(result) - @property def _values(self): # We override here, since our parent uses _data, which we don't use. @@ -659,6 +649,22 @@ def array(self): "'MultiIndex.to_numpy()' to get a NumPy array of tuples." ) + # -------------------------------------------------------------------- + # Levels Methods + + @cache_readonly + def levels(self): + # Use cache_readonly to ensure that self.get_locs doesn't repeatedly + # create new IndexEngine + # https://github.com/pandas-dev/pandas/issues/31648 + result = [ + x._shallow_copy(name=name) for x, name in zip(self._levels, self._names) + ] + for level in result: + # disallow midx.levels[0].name = "foo" + level._no_setting_name = True + return FrozenList(result) + def _set_levels( self, levels, level=None, copy=False, validate=True, verify_integrity=False ): @@ -1227,6 +1233,9 @@ def _set_names(self, names, level=None, validate=True): ) self._names[lev] = name + # If .levels has been accessed, the names in our cache will be stale. + self._reset_cache() + names = property( fset=_set_names, fget=_get_names, doc="""\nNames of levels in MultiIndex.\n""" ) diff --git a/pandas/tests/indexes/multi/test_get_set.py b/pandas/tests/indexes/multi/test_get_set.py index 074072ae581b2..57d16a739c213 100644 --- a/pandas/tests/indexes/multi/test_get_set.py +++ b/pandas/tests/indexes/multi/test_get_set.py @@ -159,7 +159,7 @@ def test_set_levels_codes_directly(idx): minor_codes = [(x + 1) % 1 for x in minor_codes] new_codes = [major_codes, minor_codes] - msg = "can't set attribute" + msg = "[Cc]an't set attribute" with pytest.raises(AttributeError, match=msg): idx.levels = new_levels with pytest.raises(AttributeError, match=msg): From 103840e974a4c8548fccab86cabc6a3161bd94e8 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 4 Feb 2020 14:03:14 -0600 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v1.0.1.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.1.rst b/doc/source/whatsnew/v1.0.1.rst index 92cd2038aabe6..f9c756b2518af 100644 --- a/doc/source/whatsnew/v1.0.1.rst +++ b/doc/source/whatsnew/v1.0.1.rst @@ -27,6 +27,7 @@ Fixed regressions - Fixed regression in objTOJSON.c fix return-type warning (:issue:`31463`) - Fixed regression in :meth:`qcut` when passed a nullable integer. (:issue:`31389`) - Fixed regression in assigning to a :class:`Series` using a nullable integer dtype (:issue:`31446`) +- Fixed performance regression when indexing a ``DataFrame`` or ``Series`` with a :class:`MultiIndex` for the index using a list of labels (:issue:`31648`) .. ---------------------------------------------------------------------------