From 42ff002cb5db7d568bd7f07574fb37b554faf9f2 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 16 Jul 2021 15:06:42 -0700 Subject: [PATCH 1/3] BUG: DataFrame.copy not consolidating --- pandas/core/internals/managers.py | 1 + pandas/tests/frame/methods/test_copy.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index dca6ddf703446..2a71591cef4d8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -598,6 +598,7 @@ def copy_func(ax): res = self.apply("copy", deep=deep) res.axes = new_axes + res._consolidate_inplace() return res def consolidate(self: T) -> T: diff --git a/pandas/tests/frame/methods/test_copy.py b/pandas/tests/frame/methods/test_copy.py index be52cf55fccb2..1c0b0755e7d94 100644 --- a/pandas/tests/frame/methods/test_copy.py +++ b/pandas/tests/frame/methods/test_copy.py @@ -1,5 +1,8 @@ +import numpy as np import pytest +import pandas.util._test_decorators as td + from pandas import DataFrame import pandas._testing as tm @@ -41,3 +44,20 @@ def test_copy(self, float_frame, float_string_frame): # copy objects copy = float_string_frame.copy() assert copy._mgr is not float_string_frame._mgr + + @td.skip_array_manager_invalid_test + def test_copy_consolidates(self): + # GH#42477 + df = DataFrame( + { + "a": np.random.randint(0, 100, size=55), + "b": np.random.randint(0, 100, size=55), + } + ) + + for i in range(0, 10): + df.loc[:, f"n_{i}"] = np.random.randint(0, 100, size=55) + + assert len(df._mgr.blocks) == 11 + result = df.copy() + assert len(result._mgr.blocks) == 1 From 0e8c42c03ace6ec30f4336de8af247369a486875 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 19 Jul 2021 19:26:50 -0700 Subject: [PATCH 2/3] whatsnew --- doc/source/whatsnew/v1.3.1.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.1.rst b/doc/source/whatsnew/v1.3.1.rst index 2ce146660f98c..ad6beb1cd6cf1 100644 --- a/doc/source/whatsnew/v1.3.1.rst +++ b/doc/source/whatsnew/v1.3.1.rst @@ -33,7 +33,7 @@ Bug fixes ~~~~~~~~~ - Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`) - Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`) -- +- Fixed bug in :meth:`DataFrame.copy` failing to consolidate blocks in the result (:issue:`42579`) .. --------------------------------------------------------------------------- From ea51a58b53aa03765a42c56af3d1bd497e4458b7 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 20 Jul 2021 16:08:37 -0700 Subject: [PATCH 3/3] unstack deep=False --- pandas/core/internals/managers.py | 4 +++- pandas/core/reshape/reshape.py | 3 ++- pandas/tests/internals/test_internals.py | 3 +++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 2a71591cef4d8..a888649d3ed98 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -598,7 +598,9 @@ def copy_func(ax): res = self.apply("copy", deep=deep) res.axes = new_axes - res._consolidate_inplace() + + if deep: + res._consolidate_inplace() return res def consolidate(self: T) -> T: diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 12ab08c4e30a1..06af3b278245f 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -402,7 +402,8 @@ def _unstack_multiple(data, clocs, fill_value=None): return result - dummy = data.copy() + # GH#42579 deep=False to avoid consolidating + dummy = data.copy(deep=False) dummy.index = dummy_index unstacked = dummy.unstack("__placeholder__", fill_value=fill_value) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 0f4a30cfa9cf9..38a6209283080 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -461,6 +461,9 @@ def test_copy(self, mgr): # DatetimeTZBlock has DatetimeIndex values assert cp_blk.values._data.base is blk.values._data.base + # copy(deep=True) consolidates, so the block-wise assertions will + # fail is mgr is not consolidated + mgr._consolidate_inplace() cp = mgr.copy(deep=True) for blk, cp_blk in zip(mgr.blocks, cp.blocks):