Skip to content

Commit 7b892bd

Browse files
committed
API: Honor copy for dict-input in DataFrame
Closes pandas-dev#32960
1 parent 80ba4c4 commit 7b892bd

File tree

4 files changed

+48
-14
lines changed

4 files changed

+48
-14
lines changed

pandas/core/frame.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -359,7 +359,12 @@ class DataFrame(NDFrame):
359359
dtype : dtype, default None
360360
Data type to force. Only a single dtype is allowed. If None, infer.
361361
copy : bool, default False
362-
Copy data from inputs. Only affects DataFrame / 2d ndarray input.
362+
Copy data from inputs. This only applies to specific cases.
363+
364+
* `data` is a DataFrame or 2D NumPy array
365+
* `data` is a dict with at most one column per NumPy dtype.
366+
367+
Or all other cases, zero-copy construction cannot be ensured.
363368
364369
See Also
365370
--------
@@ -456,7 +461,7 @@ def __init__(
456461
)
457462

458463
elif isinstance(data, dict):
459-
mgr = init_dict(data, index, columns, dtype=dtype)
464+
mgr = init_dict(data, index, columns, dtype=dtype, copy=copy)
460465
elif isinstance(data, ma.MaskedArray):
461466
import numpy.ma.mrecords as mrecords
462467

pandas/core/internals/construction.py

+12-6
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ def arrays_to_mgr(
6464
columns,
6565
dtype: Optional[DtypeObj] = None,
6666
verify_integrity: bool = True,
67+
copy: bool = False,
6768
):
6869
"""
6970
Segregate Series based on type and coerce into matrices.
@@ -80,7 +81,7 @@ def arrays_to_mgr(
8081
index = ensure_index(index)
8182

8283
# don't force copy because getting jammed in an ndarray anyway
83-
arrays = _homogenize(arrays, index, dtype)
84+
arrays = _homogenize(arrays, index, dtype, copy=copy)
8485

8586
columns = ensure_index(columns)
8687
else:
@@ -234,7 +235,9 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
234235
return create_block_manager_from_blocks(block_values, [columns, index])
235236

236237

237-
def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
238+
def init_dict(
239+
data: Dict, index, columns, dtype: Optional[DtypeObj] = None, copy: bool = False
240+
):
238241
"""
239242
Segregate Series based on type and coerce into matrices.
240243
Needs to handle a lot of exceptional cases.
@@ -272,6 +275,7 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
272275
keys = list(data.keys())
273276
columns = data_names = Index(keys)
274277
arrays = [com.maybe_iterable_to_list(data[k]) for k in keys]
278+
# breakpoint()
275279
# GH#24096 need copy to be deep for datetime64tz case
276280
# TODO: See if we can avoid these copies
277281
arrays = [
@@ -280,7 +284,7 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):
280284
arrays = [
281285
arr if not is_datetime64tz_dtype(arr) else arr.copy() for arr in arrays
282286
]
283-
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype)
287+
return arrays_to_mgr(arrays, data_names, index, columns, dtype=dtype, copy=copy)
284288

285289

286290
# ---------------------------------------------------------------------
@@ -326,14 +330,16 @@ def convert(v):
326330
return values
327331

328332

329-
def _homogenize(data, index, dtype: Optional[DtypeObj]):
333+
def _homogenize(data, index, dtype: Optional[DtypeObj], copy: bool = False):
330334
oindex = None
331335
homogenized = []
332336

333337
for val in data:
334338
if isinstance(val, ABCSeries):
335339
if dtype is not None:
336-
val = val.astype(dtype)
340+
val = val.astype(dtype, copy=copy)
341+
elif copy:
342+
val = val.copy()
337343
if val.index is not index:
338344
# Forces alignment. No need to copy data since we
339345
# are putting it into an ndarray later
@@ -349,7 +355,7 @@ def _homogenize(data, index, dtype: Optional[DtypeObj]):
349355
val = dict(val)
350356
val = lib.fast_multiget(val, oindex._values, default=np.nan)
351357
val = sanitize_array(
352-
val, index, dtype=dtype, copy=False, raise_cast_failure=False
358+
val, index, dtype=dtype, copy=copy, raise_cast_failure=False
353359
)
354360

355361
homogenized.append(val)

pandas/core/internals/managers.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -1817,10 +1817,13 @@ def _shape_compat(x):
18171817

18181818
first = arrays[0]
18191819
shape = (len(arrays),) + _shape_compat(first)
1820-
1821-
stacked = np.empty(shape, dtype=dtype)
1822-
for i, arr in enumerate(arrays):
1823-
stacked[i] = _asarray_compat(arr)
1820+
if len(arrays) == 1:
1821+
# allow for 0-copy construction from dict
1822+
stacked = _asarray_compat(first).reshape(shape)
1823+
else:
1824+
stacked = np.empty(shape, dtype=dtype)
1825+
for i, arr in enumerate(arrays):
1826+
stacked[i] = _asarray_compat(arr)
18241827

18251828
return stacked, placement
18261829

pandas/tests/frame/test_constructors.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -1909,12 +1909,16 @@ def test_constructor_ndarray_copy(self, float_frame):
19091909
assert not (df.values[6] == 6).all()
19101910

19111911
def test_constructor_series_copy(self, float_frame):
1912-
series = float_frame._series
1912+
series = float_frame._series.copy()
1913+
1914+
df = DataFrame({"A": series["A"]}, copy=True)
1915+
df["A"][:] = 5
1916+
assert not (series["A"] == 5).all()
19131917

19141918
df = DataFrame({"A": series["A"]})
19151919
df["A"][:] = 5
19161920

1917-
assert not (series["A"] == 5).all()
1921+
assert (series["A"] == 5).all()
19181922

19191923
def test_constructor_with_nas(self):
19201924
# GH 5016
@@ -2679,3 +2683,19 @@ def test_construction_from_set_raises(self):
26792683
msg = "Set type is unordered"
26802684
with pytest.raises(TypeError, match=msg):
26812685
pd.DataFrame({"a": {1, 2, 3}})
2686+
2687+
2688+
@pytest.mark.parametrize("copy", [False, True])
2689+
def test_dict_nocopy(copy):
2690+
a = np.array([1, 2])
2691+
b = pd.array([1, 2])
2692+
df = pd.DataFrame({"a": a, "b": b}, copy=copy)
2693+
df.iloc[0, 0] = 0
2694+
df.iloc[0, 1] = 0
2695+
2696+
if copy:
2697+
assert a[0] == 1
2698+
assert b[0] == 1
2699+
else:
2700+
assert a[0] == 0
2701+
assert b[0] == 0

0 commit comments

Comments
 (0)