Skip to content

Commit fd415e1

Browse files
jbrockmendelfeefladder
authored andcommitted
BUG: DataFrame.copy not consolidating (pandas-dev#42579)
1 parent d8fb384 commit fd415e1

File tree

5 files changed

+29
-2
lines changed

5 files changed

+29
-2
lines changed

doc/source/whatsnew/v1.3.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ Bug fixes
3737
~~~~~~~~~
3838
- Fixed bug in :meth:`DataFrame.transpose` dropping values when the DataFrame had an Extension Array dtype and a duplicate index (:issue:`42380`)
3939
- Fixed bug in :meth:`DataFrame.to_xml` raising ``KeyError`` when called with ``index=False`` and an offset index (:issue:`42458`)
40-
-
40+
- Fixed bug in :meth:`DataFrame.copy` failing to consolidate blocks in the result (:issue:`42579`)
4141

4242
.. ---------------------------------------------------------------------------
4343

pandas/core/internals/managers.py

+3
Original file line numberDiff line numberDiff line change
@@ -598,6 +598,9 @@ def copy_func(ax):
598598

599599
res = self.apply("copy", deep=deep)
600600
res.axes = new_axes
601+
602+
if deep:
603+
res._consolidate_inplace()
601604
return res
602605

603606
def consolidate(self: T) -> T:

pandas/core/reshape/reshape.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,8 @@ def _unstack_multiple(data, clocs, fill_value=None):
402402

403403
return result
404404

405-
dummy = data.copy()
405+
# GH#42579 deep=False to avoid consolidating
406+
dummy = data.copy(deep=False)
406407
dummy.index = dummy_index
407408

408409
unstacked = dummy.unstack("__placeholder__", fill_value=fill_value)

pandas/tests/frame/methods/test_copy.py

+20
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
import numpy as np
12
import pytest
23

4+
import pandas.util._test_decorators as td
5+
36
from pandas import DataFrame
47
import pandas._testing as tm
58

@@ -41,3 +44,20 @@ def test_copy(self, float_frame, float_string_frame):
4144
# copy objects
4245
copy = float_string_frame.copy()
4346
assert copy._mgr is not float_string_frame._mgr
47+
48+
@td.skip_array_manager_invalid_test
49+
def test_copy_consolidates(self):
50+
# GH#42477
51+
df = DataFrame(
52+
{
53+
"a": np.random.randint(0, 100, size=55),
54+
"b": np.random.randint(0, 100, size=55),
55+
}
56+
)
57+
58+
for i in range(0, 10):
59+
df.loc[:, f"n_{i}"] = np.random.randint(0, 100, size=55)
60+
61+
assert len(df._mgr.blocks) == 11
62+
result = df.copy()
63+
assert len(result._mgr.blocks) == 1

pandas/tests/internals/test_internals.py

+3
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,9 @@ def test_copy(self, mgr):
461461
# DatetimeTZBlock has DatetimeIndex values
462462
assert cp_blk.values._data.base is blk.values._data.base
463463

464+
# copy(deep=True) consolidates, so the block-wise assertions will
465+
# fail is mgr is not consolidated
466+
mgr._consolidate_inplace()
464467
cp = mgr.copy(deep=True)
465468
for blk, cp_blk in zip(mgr.blocks, cp.blocks):
466469

0 commit comments

Comments
 (0)