diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst index 9b1ad658d4666..c52a956146fc2 100644 --- a/doc/source/whatsnew/v1.1.2.rst +++ b/doc/source/whatsnew/v1.1.2.rst @@ -32,6 +32,7 @@ Bug fixes - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`) - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`) - Bug in :meth:`Float64Index.__contains__` incorrectly raising ``TypeError`` instead of returning ``False`` (:issue:`35788`) +- Bug in :class:`DataFrame` indexing returning an incorrect :class:`Series` in some cases when the series has been altered and a cache not invalidated (:issue:`36051`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 3bad2d6dd18b9..7a5ba69902dfa 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -3315,6 +3315,10 @@ def _maybe_update_cacher( if len(self) == len(ref): # otherwise, either self or ref has swapped in new arrays ref._maybe_cache_changed(cacher[0], self) + else: + # GH#33675 we have swapped in a new array, so parent + # reference to self is now invalid + ref._item_cache.pop(cacher[0], None) if verify_is_copy: self._check_setitem_copy(stacklevel=5, t="referant") diff --git a/pandas/tests/frame/test_missing.py b/pandas/tests/frame/test_missing.py index 9bf5d24085697..b4f91590e09d1 100644 --- a/pandas/tests/frame/test_missing.py +++ b/pandas/tests/frame/test_missing.py @@ -135,13 +135,20 @@ def test_drop_and_dropna_caching(self): df2 = df.copy() df["A"].dropna() tm.assert_series_equal(df["A"], original) - return_value = df["A"].dropna(inplace=True) - tm.assert_series_equal(df["A"], expected) + + ser = df["A"] + return_value = ser.dropna(inplace=True) + tm.assert_series_equal(ser, expected) + tm.assert_series_equal(df["A"], original) assert return_value is None + df2["A"].drop([1]) tm.assert_series_equal(df2["A"], original) - return_value = df2["A"].drop([1], inplace=True) - tm.assert_series_equal(df2["A"], original.drop([1])) + + ser = df2["A"] + return_value = ser.drop([1], inplace=True) + tm.assert_series_equal(ser, original.drop([1])) + tm.assert_series_equal(df2["A"], original) assert return_value is None def test_dropna_corner(self, float_frame): diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index fa5fe5ba5c384..9910ef1b04b1a 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -81,6 +81,21 @@ def test_setitem_cache_updating(self): tm.assert_frame_equal(out, expected) tm.assert_series_equal(out["A"], expected["A"]) + def test_altering_series_clears_parent_cache(self): + # GH #33675 + df = pd.DataFrame([[1, 2], [3, 4]], index=["a", "b"], columns=["A", "B"]) + ser = df["A"] + + assert "A" in df._item_cache + + # Adding a new entry to ser swaps in a new array, so "A" needs to + # be removed from df._item_cache + ser["c"] = 5 + assert len(ser) == 3 + assert "A" not in df._item_cache + assert df["A"] is not ser + assert len(df["A"]) == 2 + class TestChaining: def test_setitem_chained_setfault(self):