From 376199432dd7f83a23f5921fcfbabd18fc38fd1b Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 9 Jan 2022 10:21:53 -0800 Subject: [PATCH] BUG: frame[x].loc[y] inconsistent with frame.at[x, y] GH#22372 --- doc/source/whatsnew/v1.5.0.rst | 1 + pandas/core/internals/managers.py | 7 ++++--- pandas/tests/indexing/test_at.py | 23 +++++++++++++++++++++++ pandas/tests/internals/test_internals.py | 5 ++++- 4 files changed, 32 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index e723918ad8b4b..f738a99d2c896 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -152,6 +152,7 @@ Interval Indexing ^^^^^^^^ +- Bug in :meth:`loc.__getitem__` with a list of keys causing an internal inconsistency that could lead to a disconnect between ``frame.at[x, y]`` vs ``frame[y].loc[x]`` (:issue:`22372`) - Bug in :meth:`Series.__setitem__` with a non-integer :class:`Index` when using an integer key to set a value that cannot be set inplace where a ``ValueError`` was raised insead of casting to a common dtype (:issue:`45070`) - diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 0a9461c6a0ce3..136353848f361 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -671,9 +671,6 @@ def reindex_indexer( result.axes[axis] = new_axis return result - if consolidate: - self._consolidate_inplace() - # some axes don't allow reindexing with dups if not allow_dups: self.axes[axis]._validate_can_reindex(indexer) @@ -1681,6 +1678,10 @@ def _consolidate_check(self) -> None: self._known_consolidated = True def _consolidate_inplace(self) -> None: + # In general, _consolidate_inplace should only be called via + # DataFrame._consolidate_inplace, otherwise we will fail to invalidate + # the DataFrame's _item_cache. The exception is for newly-created + # BlockManager objects not yet attached to a DataFrame. if not self.is_consolidated(): self.blocks = tuple(_consolidate(self.blocks)) self._is_consolidated = True diff --git a/pandas/tests/indexing/test_at.py b/pandas/tests/indexing/test_at.py index d6be817ab6f77..a2f02378d061a 100644 --- a/pandas/tests/indexing/test_at.py +++ b/pandas/tests/indexing/test_at.py @@ -48,6 +48,29 @@ def test_selection_methods_of_assigned_col(): class TestAtSetItem: + def test_at_setitem_item_cache_cleared(self): + # GH#22372 Note the multi-step construction is necessary to trigger + # the original bug. pandas/issues/22372#issuecomment-413345309 + df = DataFrame(index=[0]) + df["x"] = 1 + df["cost"] = 2 + + # accessing df["cost"] adds "cost" to the _item_cache + df["cost"] + + # This loc[[0]] lookup used to call _consolidate_inplace at the + # BlockManager level, which failed to clear the _item_cache + df.loc[[0]] + + df.at[0, "x"] = 4 + df.at[0, "cost"] = 789 + + expected = DataFrame({"x": [4], "cost": 789}, index=[0]) + tm.assert_frame_equal(df, expected) + + # And in particular, check that the _item_cache has updated correctly. + tm.assert_series_equal(df["cost"], expected["cost"]) + def test_at_setitem_mixed_index_assignment(self): # GH#19860 ser = Series([1, 2, 3, 4, 5], index=["a", "b", "c", 1, 2]) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index a5ba684a37edf..2ca22c76c5ca0 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -722,7 +722,10 @@ def test_reindex_items(self): mgr = create_mgr("a: f8; b: i8; c: f8; d: i8; e: f8; f: bool; g: f8-2") reindexed = mgr.reindex_axis(["g", "c", "a", "d"], axis=0) - assert reindexed.nblocks == 2 + # reindex_axis does not consolidate_inplace, as that risks failing to + # invalidate _item_cache + assert not reindexed.is_consolidated() + tm.assert_index_equal(reindexed.items, Index(["g", "c", "a", "d"])) tm.assert_almost_equal( mgr.iget(6).internal_values(), reindexed.iget(0).internal_values()