Skip to content

Commit 6861fc5

Browse files
PERF/BUG: ensure we store contiguous arrays in DataFrame(ndarray) for ArrayManager (#44562)
1 parent 35740b7 commit 6861fc5

File tree

4 files changed

+58
-26
lines changed

4 files changed

+58
-26
lines changed

pandas/core/frame.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -593,13 +593,6 @@ def __init__(
593593
copy: bool | None = None,
594594
):
595595

596-
if copy is None:
597-
if isinstance(data, dict) or data is None:
598-
# retain pre-GH#38939 default behavior
599-
copy = True
600-
else:
601-
copy = False
602-
603596
if data is None:
604597
data = {}
605598
if dtype is not None:
@@ -618,6 +611,21 @@ def __init__(
618611

619612
manager = get_option("mode.data_manager")
620613

614+
if copy is None:
615+
if isinstance(data, dict):
616+
# retain pre-GH#38939 default behavior
617+
copy = True
618+
elif (
619+
manager == "array"
620+
and isinstance(data, (np.ndarray, ExtensionArray))
621+
and data.ndim == 2
622+
):
623+
# INFO(ArrayManager) by default copy the 2D input array to get
624+
# contiguous 1D arrays
625+
copy = True
626+
else:
627+
copy = False
628+
621629
if isinstance(data, (BlockManager, ArrayManager)):
622630
mgr = self._init_mgr(
623631
data, axes={"index": index, "columns": columns}, dtype=dtype, copy=copy

pandas/core/internals/construction.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,10 @@ def ndarray_to_mgr(
290290
if not len(values) and columns is not None and len(columns):
291291
values = np.empty((0, 1), dtype=object)
292292

293+
# if the array preparation does a copy -> avoid this for ArrayManager,
294+
# since the copy is done on conversion to 1D arrays
295+
copy_on_sanitize = False if typ == "array" else copy
296+
293297
vdtype = getattr(values, "dtype", None)
294298
if is_1d_only_ea_dtype(vdtype) or isinstance(dtype, ExtensionDtype):
295299
# GH#19157
@@ -324,7 +328,7 @@ def ndarray_to_mgr(
324328
else:
325329
# by definition an array here
326330
# the dtypes will be coerced to a single dtype
327-
values = _prep_ndarray(values, copy=copy)
331+
values = _prep_ndarray(values, copy=copy_on_sanitize)
328332

329333
if dtype is not None and not is_dtype_equal(values.dtype, dtype):
330334
shape = values.shape
@@ -334,7 +338,7 @@ def ndarray_to_mgr(
334338
rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f")
335339

336340
values = sanitize_array(
337-
flat, None, dtype=dtype, copy=copy, raise_cast_failure=rcf
341+
flat, None, dtype=dtype, copy=copy_on_sanitize, raise_cast_failure=rcf
338342
)
339343

340344
values = values.reshape(shape)
@@ -363,6 +367,9 @@ def ndarray_to_mgr(
363367
values = ensure_wrapped_if_datetimelike(values)
364368
arrays = [values[:, i] for i in range(values.shape[1])]
365369

370+
if copy:
371+
arrays = [arr.copy() for arr in arrays]
372+
366373
return ArrayManager(arrays, [index, columns], verify_integrity=False)
367374

368375
values = values.T

pandas/tests/frame/methods/test_values.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -226,8 +226,8 @@ def test_values_lcd(self, mixed_float_frame, mixed_int_frame):
226226

227227

228228
class TestPrivateValues:
229-
def test_private_values_dt64tz(self, request):
230-
229+
@td.skip_array_manager_invalid_test
230+
def test_private_values_dt64tz(self):
231231
dta = date_range("2000", periods=4, tz="US/Central")._data.reshape(-1, 1)
232232

233233
df = DataFrame(dta, columns=["A"])

pandas/tests/frame/test_constructors.py

+32-15
Original file line numberDiff line numberDiff line change
@@ -264,12 +264,17 @@ def test_constructor_dtype_nocast_view_dataframe(self):
264264
should_be_view[0][0] = 99
265265
assert df.values[0, 0] == 99
266266

267-
@td.skip_array_manager_invalid_test # TODO(ArrayManager) keep view on 2D array?
268-
def test_constructor_dtype_nocast_view_2d_array(self):
269-
df = DataFrame([[1, 2]])
270-
should_be_view = DataFrame(df.values, dtype=df[0].dtype)
271-
should_be_view[0][0] = 97
272-
assert df.values[0, 0] == 97
267+
def test_constructor_dtype_nocast_view_2d_array(self, using_array_manager):
268+
df = DataFrame([[1, 2], [3, 4]], dtype="int64")
269+
if not using_array_manager:
270+
should_be_view = DataFrame(df.values, dtype=df[0].dtype)
271+
should_be_view[0][0] = 97
272+
assert df.values[0, 0] == 97
273+
else:
274+
# INFO(ArrayManager) DataFrame(ndarray) doesn't necessarily preserve
275+
# a view on the array to ensure contiguous 1D arrays
276+
df2 = DataFrame(df.values, dtype=df[0].dtype)
277+
assert df2._mgr.arrays[0].flags.c_contiguous
273278

274279
@td.skip_array_manager_invalid_test
275280
def test_1d_object_array_does_not_copy(self):
@@ -2111,17 +2116,29 @@ def test_constructor_frame_copy(self, float_frame):
21112116
assert (cop["A"] == 5).all()
21122117
assert not (float_frame["A"] == 5).all()
21132118

2114-
# TODO(ArrayManager) keep view on 2D array?
2115-
@td.skip_array_manager_not_yet_implemented
2116-
def test_constructor_ndarray_copy(self, float_frame):
2117-
df = DataFrame(float_frame.values)
2119+
def test_constructor_ndarray_copy(self, float_frame, using_array_manager):
2120+
if not using_array_manager:
2121+
df = DataFrame(float_frame.values)
21182122

2119-
float_frame.values[5] = 5
2120-
assert (df.values[5] == 5).all()
2123+
float_frame.values[5] = 5
2124+
assert (df.values[5] == 5).all()
21212125

2122-
df = DataFrame(float_frame.values, copy=True)
2123-
float_frame.values[6] = 6
2124-
assert not (df.values[6] == 6).all()
2126+
df = DataFrame(float_frame.values, copy=True)
2127+
float_frame.values[6] = 6
2128+
assert not (df.values[6] == 6).all()
2129+
else:
2130+
arr = float_frame.values.copy()
2131+
# default: copy to ensure contiguous arrays
2132+
df = DataFrame(arr)
2133+
assert df._mgr.arrays[0].flags.c_contiguous
2134+
arr[0, 0] = 100
2135+
assert df.iloc[0, 0] != 100
2136+
2137+
# manually specify copy=False
2138+
df = DataFrame(arr, copy=False)
2139+
assert not df._mgr.arrays[0].flags.c_contiguous
2140+
arr[0, 0] = 1000
2141+
assert df.iloc[0, 0] == 1000
21252142

21262143
# TODO(ArrayManager) keep view on Series?
21272144
@td.skip_array_manager_not_yet_implemented

0 commit comments

Comments
 (0)