diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b40f012f034b6..90f611c55e710 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -104,7 +104,7 @@ Timezones Numeric ^^^^^^^ - +- Bug in :meth:`DataFrame.quantile`, :meth:`DataFrame.sort_values` causing incorrect subsequent indexing behavior (:issue:`38351`) - - diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 93ab207d8ce12..0b3f1079cdb16 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -442,7 +442,6 @@ def apply( def quantile( self, axis: int = 0, - consolidate: bool = True, transposed: bool = False, interpolation="linear", qs=None, @@ -472,9 +471,6 @@ def quantile( # simplify some of the code here and in the blocks assert self.ndim >= 2 - if consolidate: - self._consolidate_inplace() - def get_axe(block, qs, axes): # Because Series dispatches to DataFrame, we will always have # block.ndim == 2 @@ -1455,7 +1451,6 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True """ Take items along any axis. """ - self._consolidate_inplace() indexer = ( np.arange(indexer.start, indexer.stop, indexer.step, dtype="int64") if isinstance(indexer, slice) @@ -1472,7 +1467,11 @@ def take(self, indexer, axis: int = 1, verify: bool = True, convert: bool = True new_labels = self.axes[axis].take(indexer) return self.reindex_indexer( - new_axis=new_labels, indexer=indexer, axis=axis, allow_dups=True + new_axis=new_labels, + indexer=indexer, + axis=axis, + allow_dups=True, + consolidate=False, ) def equals(self, other: object) -> bool: diff --git a/pandas/tests/frame/methods/test_quantile.py b/pandas/tests/frame/methods/test_quantile.py index 13e00c97d6f71..6ddba8b5e7064 100644 --- a/pandas/tests/frame/methods/test_quantile.py +++ b/pandas/tests/frame/methods/test_quantile.py @@ -517,3 +517,15 @@ def test_quantile_empty_no_columns(self): expected = DataFrame([], index=[0.5], columns=[]) expected.columns.name = "captain tightpants" tm.assert_frame_equal(result, expected) + + def test_quantile_item_cache(self): + # previous behavior incorrect retained an invalid _item_cache entry + df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + df["D"] = df["A"] * 2 + ser = df["A"] + assert len(df._mgr.blocks) == 2 + + df.quantile(numeric_only=False) + ser.values[0] = 99 + + assert df.iloc[0, 0] == df["A"][0] diff --git a/pandas/tests/frame/methods/test_sort_values.py b/pandas/tests/frame/methods/test_sort_values.py index b94f54a4819c0..1bb969956e074 100644 --- a/pandas/tests/frame/methods/test_sort_values.py +++ b/pandas/tests/frame/methods/test_sort_values.py @@ -544,6 +544,18 @@ def test_sort_values_nat_na_position_default(self): result = expected.sort_values(["A", "date"]) tm.assert_frame_equal(result, expected) + def test_sort_values_item_cache(self): + # previous behavior incorrect retained an invalid _item_cache entry + df = DataFrame(np.random.randn(4, 3), columns=["A", "B", "C"]) + df["D"] = df["A"] * 2 + ser = df["A"] + assert len(df._mgr.blocks) == 2 + + df.sort_values(by="A") + ser.values[0] = 99 + + assert df.iloc[0, 0] == df["A"][0] + class TestDataFrameSortKey: # test key sorting (issue 27237) def test_sort_values_inplace_key(self, sort_by_key):