Skip to content

Commit d71ca72

Browse files
Backport PR #42579: BUG: DataFrame.copy not consolidating (#42679)
Co-authored-by: jbrockmendel <[email protected]>
1 parent a96cd88 commit d71ca72

File tree

5 files changed

+29
-2
lines changed

5 files changed

+29
-2
lines changed

doc/source/whatsnew/v1.3.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ Bug fixes
3737
~~~~~~~~~
3838
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
3939
- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`)
40-
-
40+
- Fixed bug in :meth:`DataFrame.copy` failing to consolidate blocks in the result (:issue:`42579`)
4141

4242
.. ---------------------------------------------------------------------------
4343

pandas/core/internals/managers.py

+3
Original file line numberDiff line numberDiff line change
@@ -594,6 +594,9 @@ def copy_func(ax):
594594

595595
res = self.apply("copy", deep=deep)
596596
res.axes = new_axes
597+
598+
if deep:
599+
res._consolidate_inplace()
597600
return res
598601

599602
def consolidate(self: T) -> T:

pandas/core/reshape/reshape.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,8 @@ def _unstack_multiple(data, clocs, fill_value=None):
399399

400400
return result
401401

402-
dummy = data.copy()
402+
# GH#42579 deep=False to avoid consolidating
403+
dummy = data.copy(deep=False)
403404
dummy.index = dummy_index
404405

405406
unstacked = dummy.unstack("__placeholder__", fill_value=fill_value)

pandas/tests/frame/methods/test_copy.py

+20
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import numpy as np
12
import pytest
23

4+
import pandas.util._test_decorators as td
5+
36
from pandas import DataFrame
47
import pandas._testing as tm
58

@@ -41,3 +44,20 @@ def test_copy(self, float_frame, float_string_frame):
4144
# copy objects
4245
copy = float_string_frame.copy()
4346
assert copy._mgr is not float_string_frame._mgr
47+
48+
@td.skip_array_manager_invalid_test
49+
def test_copy_consolidates(self):
50+
# GH#42477
51+
df = DataFrame(
52+
{
53+
"a": np.random.randint(0, 100, size=55),
54+
"b": np.random.randint(0, 100, size=55),
55+
}
56+
)
57+
58+
for i in range(0, 10):
59+
df.loc[:, f"n_{i}"] = np.random.randint(0, 100, size=55)
60+
61+
assert len(df._mgr.blocks) == 11
62+
result = df.copy()
63+
assert len(result._mgr.blocks) == 1

pandas/tests/internals/test_internals.py

+3
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,9 @@ def test_copy(self, mgr):
461461
# DatetimeTZBlock has DatetimeIndex values
462462
assert cp_blk.values._data.base is blk.values._data.base
463463

464+
# copy(deep=True) consolidates, so the block-wise assertions will
465+
# fail is mgr is not consolidated
466+
mgr._consolidate_inplace()
464467
cp = mgr.copy(deep=True)
465468
for blk, cp_blk in zip(mgr.blocks, cp.blocks):
466469

0 commit comments

Comments
 (0)