Skip to content

Commit bc4f149

Browse files
authored
CoW: Add lazy copy mechanism to DataFrame constructor for dict of Index (#52947)
1 parent d06f2d3 commit bc4f149

File tree

6 files changed

+47
-8
lines changed

6 files changed

+47
-8
lines changed

doc/source/whatsnew/v2.1.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,9 @@ Copy-on-Write improvements
2020
^^^^^^^^^^^^^^^^^^^^^^^^^^
2121

2222
- Setting a :class:`Series` into a :class:`DataFrame` now creates a lazy instead of a deep copy (:issue:`53142`)
23+
- The :class:`DataFrame` constructor, when constructing a DataFrame from a dictionary
24+
of Index objects and specifying ``copy=False``, will now use a lazy copy
25+
of those Index objects for the columns of the DataFrame (:issue:`52947`)
2326

2427
.. _whatsnew_210.enhancements.enhancement2:
2528

pandas/core/indexes/base.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222

2323
import numpy as np
2424

25-
from pandas._config import get_option
25+
from pandas._config import (
26+
get_option,
27+
using_copy_on_write,
28+
)
2629

2730
from pandas._libs import (
2831
NaT,
@@ -1635,7 +1638,7 @@ def to_frame(
16351638

16361639
if name is lib.no_default:
16371640
name = self._get_level_names()
1638-
result = DataFrame({name: self._values.copy()})
1641+
result = DataFrame({name: self}, copy=not using_copy_on_write())
16391642

16401643
if index:
16411644
result.index = self

pandas/core/internals/construction.py

+10-4
Original file line numberDiff line numberDiff line change
@@ -460,13 +460,19 @@ def dict_to_mgr(
460460
keys = list(data.keys())
461461
columns = Index(keys) if keys else default_index(0)
462462
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
463-
arrays = [arr if not isinstance(arr, Index) else arr._data for arr in arrays]
464463

465464
if copy:
466465
if typ == "block":
467466
# We only need to copy arrays that will not get consolidated, i.e.
468467
# only EA arrays
469-
arrays = [x.copy() if isinstance(x, ExtensionArray) else x for x in arrays]
468+
arrays = [
469+
x.copy()
470+
if isinstance(x, ExtensionArray)
471+
else x.copy(deep=True)
472+
if isinstance(x, Index)
473+
else x
474+
for x in arrays
475+
]
470476
else:
471477
# dtype check to exclude e.g. range objects, scalars
472478
arrays = [x.copy() if hasattr(x, "dtype") else x for x in arrays]
@@ -573,10 +579,10 @@ def _homogenize(
573579
refs: list[Any] = []
574580

575581
for val in data:
576-
if isinstance(val, ABCSeries):
582+
if isinstance(val, (ABCSeries, Index)):
577583
if dtype is not None:
578584
val = val.astype(dtype, copy=False)
579-
if val.index is not index:
585+
if isinstance(val, ABCSeries) and val.index is not index:
580586
# Forces alignment. No need to copy data since we
581587
# are putting it into an ndarray later
582588
val = val.reindex(index, copy=False)

pandas/tests/copy_view/index/test_index.py

+14
Original file line numberDiff line numberDiff line change
@@ -153,3 +153,17 @@ def test_infer_objects(using_copy_on_write):
153153
view_.iloc[0, 0] = "aaaa"
154154
if using_copy_on_write:
155155
tm.assert_index_equal(idx, expected, check_names=False)
156+
157+
158+
def test_index_to_frame(using_copy_on_write):
159+
idx = Index([1, 2, 3], name="a")
160+
expected = idx.copy(deep=True)
161+
df = idx.to_frame()
162+
if using_copy_on_write:
163+
assert np.shares_memory(get_array(df, "a"), idx._values)
164+
assert not df._mgr._has_no_reference(0)
165+
else:
166+
assert not np.shares_memory(get_array(df, "a"), idx._values)
167+
168+
df.iloc[0, 0] = 100
169+
tm.assert_index_equal(idx, expected)

pandas/tests/copy_view/test_constructors.py

+12
Original file line numberDiff line numberDiff line change
@@ -340,3 +340,15 @@ def test_dataframe_from_records_with_dataframe(using_copy_on_write):
340340
tm.assert_frame_equal(df, df_orig)
341341
else:
342342
tm.assert_frame_equal(df, df2)
343+
344+
345+
def test_frame_from_dict_of_index(using_copy_on_write):
346+
idx = Index([1, 2, 3])
347+
expected = idx.copy(deep=True)
348+
df = DataFrame({"a": idx}, copy=False)
349+
assert np.shares_memory(get_array(df, "a"), idx._values)
350+
if using_copy_on_write:
351+
assert not df._mgr._has_no_reference(0)
352+
353+
df.iloc[0, 0] = 100
354+
tm.assert_index_equal(idx, expected)

pandas/tests/indexes/test_common.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131

3232
class TestCommon:
3333
@pytest.mark.parametrize("name", [None, "new_name"])
34-
def test_to_frame(self, name, index_flat):
34+
def test_to_frame(self, name, index_flat, using_copy_on_write):
3535
# see GH#15230, GH#22580
3636
idx = index_flat
3737

@@ -45,7 +45,8 @@ def test_to_frame(self, name, index_flat):
4545
assert df.index is idx
4646
assert len(df.columns) == 1
4747
assert df.columns[0] == idx_name
48-
assert df[idx_name].values is not idx.values
48+
if not using_copy_on_write:
49+
assert df[idx_name].values is not idx.values
4950

5051
df = idx.to_frame(index=False, name=idx_name)
5152
assert df.index is not idx

0 commit comments

Comments
 (0)